% LaTeX template, xeCJK usepackage and Unicode text need XeLaTeX to compile.
% MiKTeX package can be downloaded at miktex.org, WinEdt can be downloaded at http://www.winedt.com/.
% WinEdt 6 and higher provide XeLaTeX and Unicode support.

% LaTeX template version 1.0.3, last revised on 2014-11-14.

\documentclass[10pt]{article}



% *************************** installed packages *************************
% AMS packages
\usepackage{amsfonts}   % TeX fonts from the American Mathematical Society.
\usepackage{amsmath}    % AMS mathematical facilities for LaTeX.
\usepackage{amssymb}    % AMS symbols
\usepackage{amsthm}     % Provide proclamations environment.

% graphics packages
\usepackage{graphics}   % Standard LaTeX graphics.
\usepackage{tikz}       % TikZ and PGF package for graphics
\usetikzlibrary{matrix} % matrix library of TikZ package
\usetikzlibrary{trees}  % trees library of TikZ package

% support for foreign languages, esp. Chinese.
\usepackage{xeCJK}      % Support for CJK (Chinese, Japanese, Korean) documents in XeLaTeX.
\setCJKmainfont{SimSun} % MUST appear when xeCJK is loaded.
%\setCJKmainfont{DFKai-SB}          % 设置正文罗马族的CKJ字体，影响 \rmfamily 和 \textrm 的字体。此处设为“标楷体”。
%\setCJKmainfont{SimSun}            % 设置正文罗马族的CKJ字体，影响 \rmfamily 和 \textrm 的字体。此处设为“宋体”。
%\setCJKmonofont{MingLiU}           % 设置正文等宽族的CJK字体，影响 \ttfamily 和 \texttt 的字体。此处设为“细明体”。
%\renewcommand\abstractname{摘要}   % 重定义摘要名：abstract->摘要。
%\renewcommand\appendixname{附录}   % 重定义附录名：appendix->附录。
%\renewcommand\bibname{参考文献}    % 重定义参考文献名：bibliography->参考文献。
%\renewcommand\contentsname{目录}   % 重定义目录名：contents->目录。
%\renewcommand\refname{参考文献}    % 重定义参考文献名：references->参考文献。

% miscellaneous packages
\usepackage[toc, page]{appendix}   % Extra control of appendices.
\usepackage{caption}    % Customising captions in floating environments.
%\captionsetup[figure]{labelformat=empty} % redefines the caption setup of the figures environment in the beamer %class.
\usepackage{clrscode}   % Typesets pseudocode as in Introduction to Algorithms.
\usepackage{epsfig}
\usepackage{eurosym}    % Metafont and macros for Euro sign.
\usepackage{float}      % Improved interface for floating objects.
\usepackage{fontspec}   % Advanced font selection in XeLaTeX and LuaLaTeX.
\usepackage{xcolor}     % Driver-independent color extensions for LaTeX and pdfLaTeX.

% must-be-the-last packages
\usepackage[driverfallback=hypertex, pagebackref]{hyperref}   % Extensive support for hypertext in LaTeX; MUST be on the last \usepackage line in the preamble. [pagebackref] for page referencing; [backref] for section referencing.
% ********************** end of installed packages ***********************



% ************************** fullpage.sty ********************************
% This is FULLPAGE.STY by H.Partl, Version 2 as of 15 Dec 1988.
% Document Style Option to fill the paper just like Plain TeX.
\typeout{Style Option FULLPAGE Version 2 as of 15 Dec 1988}

\topmargin 0pt \advance \topmargin by -\headheight \advance
\topmargin by -\headsep

\textheight 8.9in

\oddsidemargin 0pt \evensidemargin \oddsidemargin \marginparwidth
0.5in

\textwidth 6.5in
% For users of A4 paper: The above values are suited for American 8.5x11in
% paper. If your output driver performs a conversion for A4 paper, keep
% those values. If your output driver conforms to the TeX standard (1in/1in),
% then you should add the following commands to center the text on A4 paper:

% \advance\hoffset by -3mm  % A4 is narrower.
% \advance\voffset by  8mm  % A4 is taller.
% ************************ end of fullpage.sty ***************************



% ************** Proclamations (theorem-like structures) *****************
% [section] option provides numbering within a section.
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{definition}{Definition}[section]
\newtheorem{prop}{Proposition}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{remark}{Remark}
\newtheorem{example}{Example}
% ************************************************************************



% ************* Solutions use a modified proof environment ***************
\newenvironment{solution}
  {\begin{proof}[Solution]}
  {\end{proof}}
% ************************************************************************



% ************* Frequently used commands as shorthand ********************
\newcommand{\norm}{|\!|}
% ************************************************************************



\begin{document}
\title{ \Huge{\it Linear Algebra and Its Applications, 2ed.} \\ \Huge{Solution of Exercise Problems} }
\author{Yan Zeng}
\date{Version 1.0.4, last revised on 2014-08-13. }

\maketitle

\begin{abstract}
This is a solution manual for {\it Linear algebra and its applications}, 2nd edition, by Peter Lax \cite{Lax07}. This version omits the following problems: exercise 2, 9 of Chapter 8; exercise 3, 4 of Chapter 17; exercises of Chapter 18; exercise 3 of Appendix 3; exercises of Appendix 4, 5, 8 and 11.

If you would like to correct any typos/errors, please send email to zypublic@hotmail.com.
\end{abstract}

\tableofcontents

\newpage

\section{Fundamentals}

The book's own solution gives answers to Ex 1, 3, 7, 10, 13, 14, 16, 19, 20, 21.

\bigskip

\noindent $\blacktriangleright$ 1. (page 2) Show that the zero of vector addition is unique.
\begin{proof} Suppose $0$ and $0'$ are two zeros of
vector addition, then by the definition of zero and commutativity,
we have $0'=0'+0=0+0'=0$.
\end{proof}

\noindent $\blacktriangleright$ 2. (page 3) Show that the vector with all components zero serves as the zero element of classical vector addition.
\begin{proof} For any $x=(x_1,\cdots, x_n)\in K^n$, we
have
\[
x + 0 = (x_1,\cdots, x_n) + (0, \cdots, 0) = (x_1 + 0, \cdots, x_n +
0) = (x_1, \cdots, x_n) = x.
\]
So $0=(0,\cdots, 0)$ is {\it the} zero element of classical vector addition.
\end{proof}

\noindent $\blacktriangleright$ 3. (page 3) Show that (i) and (iv) are isomorphic.
\begin{proof}The isomorphism $T$ can be defined as
$T((a_1,\cdots,a_n))=a_1+a_2x+\cdots+a_nx^{n-1}$.\end{proof}

\noindent $\blacktriangleright$ 4. (page 3) Show that if $S$ has $n$ elements, (i) and (iii) are isomorphic.
\begin{proof}Suppose $S=\{s_1,\cdots, s_n\}$. The
isomorphism $T$ can be defined as $T(f)=(f(s_1),\cdots,f(s_n))$
,$\forall f \in K^{S}$.\end{proof}

\noindent $\blacktriangleright$ 5. (page 4) Show that when $K=\mathbb R$, (iv) is isomorphic with (iii) when $S$ consists of $n$ distinct points of $\mathbb R$.
\begin{proof} For any $p(x) = a_1 + a_2 x + \cdots +
a_nx^{n-1}$, we define
\[
T(p) = p(x),
\]
where $p$ on the left side of the equation is regarded as a
polynomial over $\mathbb R$ while $p(x)$ on the right side of the
equation is regarded as a function defined on $S=\{s_1, \cdots, s_n
\}$. To prove $T$ is an isomorphism, it suffices to prove $T$ is
one-to-one. This is seen through the observation that
\[
\left[
\begin{matrix}
1 & s_1 & s_1^2 & \cdots & s_1^{n-1} \\
1 & s_2 & s_2^2 & \cdots & s_2^{n-1} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
1 & s_n & s_n^2 & \cdots & s_n^{n-1}
\end{matrix}
\right] \left[ \begin{matrix} a_1 \\ a_2 \\ \vdots \\ a_n
\end{matrix} \right]
= \left[
\begin{matrix}
p(s_1) \\ p(s_2) \\ \cdots \\ p(s_n)
\end{matrix}
\right]
\]
and the Vandermonde matrix
\[
\left[
\begin{matrix}
1 & s_1 & s_1^2 & \cdots & s_1^{n-1} \\
1 & s_2 & s_2^2 & \cdots & s_2^{n-1} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
1 & s_n & s_n^2 & \cdots & s_n^{n-1}
\end{matrix}
\right]
\]
is invertible for distinct $s_1$, $s_2$, $\cdots$, $s_n$.
\end{proof}

\noindent $\blacktriangleright$ 6. (page 4) Prove that $Y+Z$ is a linear subspace of $X$ if $Y$ and $Z$ are.
\begin{proof}For any $y, y'\in Y$, $z, z'\in Z$ and
$k\in K$, we have (by commutativity and associative law)
\begin{eqnarray*}
(y+z)+(y'+z') &=&
(z+y)+(y'+z')=z+(y+(y'+z'))=z+((y+y')+z')=z+(z'+(y+y'))
\\
&=& (z+z')+(y+y') = (y+y')+(z+z') \in Y+Z,
\end{eqnarray*}
and
\[
k(y+z)=ky+kz \in Y+Z.
\]So $Y+Z$ is a linear subspace of $X$ if $Y$ and $Z$ are.
\end{proof}

\noindent $\blacktriangleright$ 7. (page 4) Prove that if $Y$ and $Z$ are linear subspaces of $X$, so is $Y\cap Z$.
\begin{proof} For any $x_1, x_2 \in Y\cap Z$, since $Y$
and $Z$ are linear subspaces of $X$, $x_1+x_2\in Y$ and $x_1+x_2\in
Z$. Therefore, $x_1+x_2\in Y\cap Z$. For any $k\in K$ and $x\in
Y\cap Z$, since $Y$ and $Z$ are linear subspaces of $X$, $kx\in Y$
and $kx \in Z$. Therefore, $kx\in Y\cap Z$. Combined, we conclude
$Y\cap Z$ is a linear subspace of $X$. \end{proof}

\noindent $\blacktriangleright$ 8. (page 4) Show that the set $\{0\}$ consisting of the zero element of a linear space $X$ is a subspace of $X$. It is called the {\it trivial subspace}.
\begin{proof} By definition of zero vector, $0+0=0\in
\{0\}$. For any $k\in K$, $k0=k(0+0)=k0+k0$. So $k0=0\in \{0\}$.
Combined, we can conclude $\{0\}$ is a linear subspace of
$X$.\end{proof}

\noindent $\blacktriangleright$ 9. (page 4) Show that the set of {\it all} linear combinations of $x_1$, $\cdots$, $x_j$ is a subspace of $X$, and that it is the smallest subspace of $X$ containing $x_1$, $\cdots$, $x_j$. This is called the {\it subspace spanned} by $x_1$, $\cdots$, $x_j$.
\begin{proof}Define $Y=\{k_1x_1+\cdots+k_jx_j:
k_1,\cdots, k_j\in K\}$. Then clearly $x_1=1x_1+0x_2+\cdots+0x_j\in
Y$. Similarly, we can show $x_2, \cdots, x_j\in Y$. Since for any $
k_1,\cdots,k_j,k_1',\cdots,k_j'\in K$,
\[
(k_1x_1+\cdots+k_jx_j)+(k_1'x_1+\cdots+k_j'x_j)=(k_1+k_1')x_1+\cdots+(k_j+k_j')x_j
\in Y\] and for any $k_1,\cdots,k_j,k\in K$,
\[
k(k_1x_1+\cdots+k_jx_j)=(kk_1)x_1+\cdots+(kk_j)x_j\in Y,
\]
we can conclude $Y$ is a linear subspace of $X$ containing
$x_1,\cdots,x_j$. Finally, if $Z$ is any linear subspace of $X$
containing $x_1,\cdots,x_j$, it is clear that  $Y\subset Z$ as $Z$
must be closed under scalar multiplication and vector addition.
Combined, we have proven $Y$ is the smallest linear subspace of $X$
containing $x_1,\cdots,x_j$.
\end{proof}

\noindent $\blacktriangleright$ 10. (page 5) Show that if the vectors $x_1$, $\cdots$, $x_j$ are linearly independent, then none of the $x_i$ is the zero vector.
\begin{proof}We prove by contradiction. Without loss
of generality, assume $x_1=0$. Then $1x_1+0x_2+\cdots+0x_j=0$. This
shows $x_1,\cdots,x_j$ are linearly dependent, a contradiction. So
$x_1\ne 0$. We can similarly prove $x_2, \cdots, x_j\ne 0$.
\end{proof}

\noindent $\blacktriangleright$ 11. (page 7) Prove that if $X$ is finite dimensional and the direct sum of $Y_1$, $\cdots$, $Y_m$, then
\[
\dim X=\sum \dim Y_j.
\]
\begin{proof}Suppose $Y_i$ has a basis
$y^i_1,\cdots,y^i_{n_i}$. Then it suffices to prove
$y^1_1,\cdots,y^1_{n_1},\cdots, y^m_1,\cdots,y^m_{n_m}$ form a basis
of $X$. By definition of direct sum, these vectors span $X$, so we
only need to show they are linearly independent. In fact, if not,
then $0$ has two distinct representations: $0=0+ \cdots + 0$ and $0
= \sum_{i=1}^m(a^i_1y^i_1+ \cdots + a^i_{n_i}y^i_{n_i})$ for some
$a^1_1,\cdots,a^1_{n_1},\cdots, a^m_1,\cdots,a^m_{n_m}$, where not
all $a^i_j$ are zero. This is contradictory with the definition of
direct sum. So we must have linear independence, which imply
$y^1_1,\cdots,y^1_{n_1},\cdots, y^m_1,\cdots,y^m_{n_m}$ form a basis
of $X$. Consequently, $\dim X = \sum \dim Y_i$.
\end{proof}

\noindent $\blacktriangleright$ 12. (page 7) Show that every finite-dimensional space $X$ over $K$ is isomorphic to $K^n$, $n = \dim X$. Show that this isomorphism is not unique when $n$ is $>1$.
\begin{proof}Fix a basis $x_1,\cdots, x_n$ of $X$, any
element $x\in X$ can be uniquely represented as
$\sum_{i=1}^n\alpha_i(x)x_i$ for some $\alpha_i(x)\in K$,
$i=1,\cdots, n$. We define the isomorphism as $x \mapsto
(\alpha_1(x),\cdots,\alpha_n(x))$. Clearly this isomorphism depends
on the basis and by varying the choice of basis, we can have
different isomorphisms.\end{proof}

\noindent $\blacktriangleright$ 13. (page 7) Prove (i)-(iii) above. Show furthermore that if $x_1\equiv x_2$, then $kx_1 \equiv kx_2$ for every scalar $k$.
\begin{proof} For any $x_1, x_2\in X$, if $x_1\equiv
x_2$, i.e. $x_1-x_2\in Y$, then $x_2-x_1=-(x_1-x_2)\in Y$, i.e.
$x_2\equiv x_1$. This is symmetry. For any $x\in X$, $x-x=0\in Y$.
So $x\equiv x$. This is reflexivity. Finally, if $x_1\equiv x_2$,
$x_2\equiv x_3$, then $x_1-x_3=(x_1-x_2)+(x_2-x_3) \in Y$, i.e.
$x_1\equiv x_3$. This is transitivity. \end{proof}

\noindent $\blacktriangleright$ 14. (page 7) Show that two congruence classes are either identical or disjoint.
\begin{proof} For any $x_1, x_2 \in X$, we can find $y
\in \{x_1\}\cap \{x_2\}$ if and only if $x_1-y\in Y$ and $x_2-y \in
Y$. Then
\[
x_1-x_2=(x_1-y) - (x_2-y) \in Y.
\]
So $\{x_1\}\cap\{x_2\}\ne \emptyset$ if and only if
$\{x_1\}=\{x_2\}$.
\end{proof}

\noindent $\blacktriangleright$ 15. (page 8) Show that the above definition of addition and multiplication by scalar is independent of the choice of representatives in the congruence class.
\begin{proof} If $\{x\}=\{x'\}$ and $\{y\}=\{y'\}$,
then $x-x', y-y'\in Y$. So $(x+y)-(x'+y') = (x-x')+(y-y')\in Y$.
This shows $\{x+y\}=\{x'+y'\}$. Also, for any $k\in K$,
$kx-kx'=k(x-x')\in Y$. So $k\{x\}=\{kx\}=\{kx'\}=k\{x'\}$.
\end{proof}

\noindent $\blacktriangleright$ 16. (page 9) Denote by $X$ the linear space of all polynomials $p(t)$ of degree $<n$, and denote by $Y$ the set of polynomials that are zero at $t_1$, $\cdots$, $t_j$, $j<n$.

(i) Show that $Y$ is a subspace of $X$.

(ii) Determine $\dim Y$.

(iii) Determine $\dim X/Y$.
\begin{proof} By theory of polynomials, we
have
\[
Y = \left\{q(t)\prod_{i=1}^j(t-t_i): \mbox{$q(t)$ is a polynomial of
degree $< n-j$}\right\}.
\]
Then it's easy to see $\dim Y = n-j$ and $\dim X/Y = \dim X - \dim Y
= j$.
\end{proof}

\noindent $\blacktriangleright$ 17. (page 10) Prove Corollary $6'$.
\begin{proof} By Theorem 6, $\dim X/Y = \dim X - \dim
Y = 0$, which implies $X/Y = \{\{0\}\}$. So $X=Y$.
\end{proof}

\noindent $\blacktriangleright$ 18. (page 11) Show that
\[
\dim X_1 \oplus X_2 = \dim X_1 + \dim X_2.
\]
\begin{proof} Define $Y_1= \{(x,0): x\in X_1, 0 \in
X_2\}$ and $Y_2=\{(0,x): 0\in X_1, x\in X_2\}$. Then $Y_1$ and $Y_2$
are linear subspaces of $X_1\oplus X_2$. It is easy to see $Y_1$ is
isomorphic to $X_1$, $Y_2$ is isomorphic to $X_2$, and $Y_1\cap Y_2
= \{(0,0)\}$. So by Theorem 7, $\dim X_1\oplus X_2 =
\dim Y_1 + \dim Y_2 - \dim (Y_1\cap
Y_2)=\dim X_1+\dim X_2-0=\dim X_1+\dim X_2$.\end{proof}

\noindent $\blacktriangleright$ 19. (page 11) $X$ is a linear space, $Y$ is a subspace. Show that $Y \oplus X/Y$ is isomorphic to $X$.

\begin{proof} By Exercise 18 and Theorem 6,
$\dim (Y\oplus X/Y) = \dim Y + \dim (X/Y)=
\dim Y  + \dim X - \dim Y=\dim X$. Since linear
spaces of same finite dimension are isomorphic (by one-to-one
mapping between their bases), $Y\oplus X/Y$ is isomorphic to $X$.
\end{proof}

\noindent $\blacktriangleright$ 20. (page 12) Which of the following sets of vectors $x=(x_1, \cdots, x_n)$ in $\mathbb R^n$ are a subspace of $\mathbb R^n$? Explain your answer.

(a) All $x$ such that $x_1 \ge 0$.

(b) All $x$ such that $x_1 + x_2 = 0$.

(c) All $x$ such that $x_1 + x_2 + 1 = 0$.

(d) All $x$ such that $x_1 = 0$.

(e) All $x$ such that $x_1$ is an integer.
\begin{proof} (a) is not since $\{x: x_1 \ge 0\}$ is
not closed under the scalar multiplication by $-1$. (b) is. (c) is
not since $x_1+x_2+1=0$ and $x_1'+x_2'+1=0$ imply
$(x_1+x_1')+(x_2+x_2')+1=-1$. (d) is. (e) is not since $x_1$ being
an integer does not guarantee $rx_1$ is an integer for any $r\in
\mathbb R$.
\end{proof}

\noindent $\blacktriangleright$ 21. (page 12) Let $U$, $V$, and $W$ be subspaces of some finite-dimensional vectors space $X$. Is the statement
\begin{eqnarray*}
\dim (U + V + W) &=& \dim U + \dim V + \dim W - \dim (U \cap V) - \dim (U \cap W) \\
& & - \dim (V \cap W) + \dim (U \cap V \cap W).
\end{eqnarray*}
true or false? If true, prove it. If false, provide a counterexample.
\begin{proof} (From the textbook's solutions, page 279.) The statement is false; here is an example to the contrary:
\begin{eqnarray*}
& & X = \mathbb R^2 = \mbox{$(x, y)$ space} \\
& & U = \{y=0\}, V = \{x=0\}, W = \{x=y\}. \\
& & U + V + W = \mathbb R^2, U \cap V = \{0\}, U \cap W = \{0\} \\
& & V \cap W = \{0\}, U \cap V \cap W = 0.
\end{eqnarray*}
\end{proof}

\section{Duality}

The book's own solution gives answers to Ex 4, 5, 6, 7.

\bigskip

\noindent $\blacktriangleright$ 1. (page 15) Given a nonzero vector $x_1$ in $X$, show that there is a linear function $l$ such that
\[
l(x_1)\ne 0.
\]
\begin{proof} We let $Y=\{k x_1: k\in K\}$. Then $Y$ is a 1-dimensional linear subspace of $X$. By Theorem 2 and Theorem 4,
\[
\dim Y^{\perp} = \dim X - \dim Y < \dim X = \dim X'
\]
So there must exist some $l\in X' \setminus Y^{\perp}$ such that $l(x_1) \ne 0$.
\end{proof}
\begin{remark}
When $K$ is $\mathbb R$ or $\mathbb C$, the proof can be constructive. Indeed, assume $e_1$, $\cdots$, $e_n$ is a basis for $X$ and $x_1 = \sum_{i=1}^n a_i e_i$. In the case of $K=\mathbb R$, define $l$ by setting $l(e_i)=a_i$, $i=1,\cdots,n$; in the case of $K=\mathbb C$, define $l$ by setting $l(e_i)=a_i^*$ (the conjugate of $a_i$), $i=1,\cdots, n$. Then in both cases, $l(x_1) = \sum_{i=1}^n|\!|a_i|\!|^2 > 0$.
\end{remark}

\noindent $\blacktriangleright$ 2. (page 15) Verify that $Y^{\perp}$ is a subspace of $X'$.
\begin{proof} For any $l_1$ and $l_2 \in Y^{\perp}$, we
have $(l_1+l_2)(y) = l_1(y)+l_2(y) = 0+0=0$ for any $y\in Y$. So
$l_1+l_2\in Y^{\perp}$. For any $k\in K$, $(kl)(y)=k(l(y))=k0=0$ for
any $y\in Y$. So $kl\in Y^{\perp}$. Combined, we conclude
$Y^{\perp}$ is a subspace of $X'$.\end{proof}

\noindent $\blacktriangleright$ 3. (page 17) Prove Theorem 6.
\begin{proof} Since $S\subset Y$, $Y^{\bot}\subset
S^{\bot}$. For ``$\supset$", let $x_1,\cdots, x_m$ be a maximal
linearly independent subset of $S$. Then $S=\mbox{span}(x_1,\cdots,
x_m)$ and $Y=\{\sum_{i=1}^m\alpha_ix_i: \alpha_1,\cdots, \alpha_m\in
K\}$ by Exercise 9 of Chapter 1. By the definition of annihilator,
for any $l \in S^{\bot}$ and $y=\sum_{i=1}^m\alpha_ix_i \in Y$, we
have
\[
l(y) = \sum_{i=1}^m\alpha_il(x_i) = 0.
\]
So $l\in Y^{\bot}$. By the arbitrariness of $l$, $S^{\bot}\subset
Y^{\bot}$. Combined, we have $S^{\bot} = Y^{\bot}$.
\end{proof}

\noindent $\blacktriangleright$ 4. (page 18) In Theorem 7 take the interval $I$ to be $[-1,1]$, and take $n$ to be $3$. Choose the three points to be $t_1=-a$,
$t_2=0$, and $t_3=a$.

(i) Determine the weights $m_1$, $m_2$, $m_3$ so that (9) holds for all polynomials of degree $<3$.

(ii) Show that for $a>\sqrt{1/3}$, all three weights are positive.

(iii) Show that for $a=\sqrt{3/5}$, (9) holds for all polynomials of degree $<6$.

\begin{proof} Suppose three linearly independent
polynomials $p_1$, $p_2$ and $p_3$ are applied to formula (9). Then
$m_1$, $m_2$ and $m_3$ must satisfy the linear equations
\[
\left[
\begin{matrix}
p_1(t_1) & p_1(t_2) & p_1(t_3) \\
p_2(t_1) & p_2(t_2) & p_2(t_3) \\
p_3(t_1) & p_3(t_2) & p_3(t_3)
\end{matrix}
\right] \left[
\begin{matrix}
m_1\\
m_2\\
m_3
\end{matrix}
\right]= \left[
\begin{matrix}
\int_{-1}^1p_1(t)dt \\
\int_{-1}^1p_2(t)dt \\
\int_{-1}^1p_3(t)dt
\end{matrix}
\right]
\]
We take $p_1(t)=1$, $p_2(t)=t$ and $p_3(t)=t^2$. The above equation
becomes
\[
\left[
\begin{matrix}
1 & 1 & 1 \\
-a & 0 & a \\
a^2 & 0 & a^2
\end{matrix}
\right] \left[
\begin{matrix}
m_1\\
m_2\\
m_3
\end{matrix}
\right]= \left[
\begin{matrix}
2 \\
0 \\
\frac{2}{3}
\end{matrix}
\right]
\]
So
\[
 \left[
\begin{matrix}
m_1\\
m_2\\
m_3
\end{matrix}
\right]=\left[
\begin{matrix}
1 & 1 & 1 \\
-a & 0 & a \\
a^2 & 0 & a^2
\end{matrix}
\right]^{-1} \left[
\begin{matrix}
2 \\
0 \\
\frac{2}{3}
\end{matrix}
\right] = \left[
\begin{matrix}
0 & -\frac{1}{2a} & \frac{1}{2a^2} \\
1 & 0 & -\frac{1}{a^2} \\
0 & \frac{1}{2a} & \frac{1}{2a^2}
\end{matrix}
\right]^{-1} \left[
\begin{matrix}
2 \\
0 \\
\frac{2}{3}
\end{matrix}
\right] = \left[
\begin{matrix}
\frac{1}{3a^2}\\
2-\frac{2}{3a^2} \\
\frac{1}{3a^2}
\end{matrix}
\right]
\]
Then it's easy to see that for $a>\sqrt{1/3}$, all three weights are
positive.

To show formula (9) holds for all polynomials of degree $<6$ when $a=\sqrt{3/5}$, we note for any odd $n\in \mathbb N$,
\[
\int_{-1}^1x^ndx = 0,\; m_1p(-a)+m_3p(a) = 0\; \mbox{since $m_1=m_2$ and $p(-x)=-p(x)$},\; \mbox{and} \; m_2p(0)=0.
\]
So (9) holds for any $x^n$ of odd degree $n$. In particular, for $p(x)=x^3$ and $p(x)=x^5$. For $p(x)=x^4$, we have
\[
\int_{-1}^1x^4dx = \frac{2}{5},\; m_1p(t_1)+m_2p(t_2)+m_3p(t_3) = 2m_1a^4 = \frac{2}{3}a^2.
\]
So formula (9) holds for $p(x)=x^4$ when $a= \sqrt{3/5}$. Combined, we conclude for $a=\sqrt{3/5}$, (9) holds for all polynomials of degree $<6$.
\end{proof}
\begin{remark}
In this exercise problem and Exercise 5 below, ``Theorem 6" is corrected to ``Theorem 7".
\end{remark}

\noindent $\blacktriangleright$ 5. (page 18) In Theorem 7 take the interval $I$ to be $[-1, 1]$, and take $n=4$. Choose the four points to be $-a$, $-b$, $b$, $a$.

(i) Determine the weights $m_1$, $m_2$, $m_3$, and $m_4$ so that (9) holds for all polynomials of degree $<4$.

(ii) For what values of $a$ and $b$ are the weights positive?

\begin{proof} We take $p_1(t)=1$, $p_2(t)=t$, $p_3(t)=t^2$, and $p_4(t)=t^3$. Then $m_1$, $m_2$, $m_3$, and $m_4$ solve the following equation:
\[
\left[
\begin{matrix}
1 & 1 & 1 & 1 \\
-a & -b & b & a \\
a^2 & b^2 & b^2 & a^2 \\
-a^3 & -b^3 & b^3 & a^3
\end{matrix}
\right]
\left[
\begin{matrix}
m_1 \\
m_2 \\
m_3 \\
m_4
\end{matrix}
\right]
=
\left[
\begin{matrix}
2 \\
0 \\
2/3 \\
0
\end{matrix}
\right]
\]
Then
\begin{eqnarray*}
\left[
\begin{matrix}
m_1 \\
m_2 \\
m_3 \\
m_4
\end{matrix}
\right]
&=&
\left[
\begin{matrix}
1 & 1 & 1 & 1 \\
-a & -b & b & a \\
a^2 & b^2 & b^2 & a^2 \\
-a^3 & -b^3 & b^3 & a^3
\end{matrix}
\right]^{-1}
\left[
\begin{matrix}
2 \\
0 \\
2/3 \\
0
\end{matrix}
\right]
\\
&=&
\left[
\begin{matrix}
\frac{b^2}{-2a^2+2b^2} & \frac{b^2}{2a^3-2ab^2} & \frac{1}{2a^2-2b^2} & \frac{1}{-2a^3+2ab^2} \\
\frac{a^2}{2a^2-2b^2} & \frac{a^2}{-2a^2b+2b^3} & \frac{1}{-2a^2+2b^2} & \frac{1}{2a^2b-2b^3} \\
\frac{a^2}{2a^2-2b^2} & \frac{a^2}{2a^2b-2b^3} & \frac{1}{-2a^2+2b^2} & \frac{1}{-2a^2b+2b^3} \\
\frac{b^2}{-2a^2+2b^2} & \frac{b^2}{-2a^3+2ab^2} & \frac{1}{2a^2-2b^2} & \frac{1}{2a^3-2ab^2}
\end{matrix}
\right]
\left[
\begin{matrix}
2 \\
0 \\
2/3 \\
0
\end{matrix}
\right]
\\
&=&
\left[
\begin{matrix}
\frac{-3b^2+1}{3(a^2-b^2)}\\
\frac{3a^2-1}{3(a^2-b^2)} \\
\frac{3a^2-1}{3(a^2-b^2)} \\
\frac{-3b^2+1}{3(a^2-b^2)}
\end{matrix}
\right]
\end{eqnarray*}
So the weights are positive if and only if one of the following two mutually exclusive cases hold
\newline 1) $b^2>\frac{1}{3}$, $a^2<b^2$, $a^2>\frac{1}{3}$;
\newline 2) $b^2<\frac{1}{3}$, $a^2>b^2$, $a^2<\frac{1}{3}$.

\end{proof}

\noindent $\blacktriangleright$ 6. (page 18) Let ${\cal P}_2$ be the linear space of all polynomials
\[
p(x) = a_0 + a_1 x + a_2 x^2
\]
with real coefficients and degree $\le 2$. Let $\xi_1$, $\xi_2$, $\xi_3$ be three distinct real numbers, and then define
\[
l_j =  p(\xi_j) \; \mbox{for $j=1, 2, 3$.}
\]

(a) Show that $l_1$, $l_2$, $l_3$ are linearly independent linear functions on ${\cal P}_2$.

(b) Show that $l_1$, $l_2$, $l_3$ is a basis for the dual space ${\cal P}_2'$.

(c) (1) Suppose $\{e_1, \cdots, e_n\}$ is a basis for the vector space $V$. Show there exist linear functions $\{l_1, \cdots, l_n\}$ in the dual space $V'$ defined by
\[
l_i(e_j) =
\begin{cases}
1 \; \mbox{if $i = j$} \\
0 \; \mbox{if $i\ne j$}
\end{cases}
\]
Show that $\{l_1, \cdots, l_n\}$ is a basis of $V'$, called the {\it dual basis}.

\hspace{.16in} (2) Find the polynomials $p_1(x)$, $p_2(x)$, $p_3(x)$ in ${\cal P}_2$ for which $l_1$, $l_2$, $l_3$ is the dual basis in ${\cal P}_2'$.

\medskip

(a) \begin{proof} (From the textbook's solutions, page 280)
 Suppose there is a linear relation
\[
al_1(p)+bl_2(p)+cl_3(p) = 0.
\]
Set $p=p(x)=(x-\xi_2)(x-\xi_3)$. Then $p(\xi_2)=p(\xi_3)=0$, $p_1(\xi_1)\ne 0$; so we get from the above relation that $a=0$. Similarly $b=0$, $c=0$.
\end{proof}

(b) \begin{proof} Since $\dim {\cal P}_2=3$, $\dim {\cal P}_2' = 3$. Since $l_1$, $l_2$, $l_3$ are linearly independent, they span ${\cal P}_2'$. \end{proof}

(c1) \begin{proof} We define $l_1$ by setting
\[
l_1(e_j)=\begin{cases} 1, & \mbox{if $j=1$} \\ 0, & \mbox{if $j\ne 1$}\end{cases}
\]
and extending $l_1$ to $V$ by linear combination, i.e. $l_1(\sum_{j=1}^n\alpha_je_j):=\sum_{j=1}^n\alpha_jl_1(e_j)=\alpha_1$. $l_2$, $\cdots$, $l_n$ can be constructed similarly. If there exist $a_1$, $\cdots$, $a_n$ such that $a_1l_1+\cdots+a_nl_n=0$, we have
\[
0 = a_1l_1(e_j) + \cdots a_n l_n(e_j) = a_j, \; j=1, \cdots, n.
\]
So $l_1$, $\cdots$, $l_n$ are linearly independent. Since $\dim V' = \dim V = n$, $\{l_1,\cdots,l_n\}$ is a basis of $V'$.
\end{proof}

(c2) \begin{proof} We define
\[
p_1(x) = \frac{(x-x_2)(x-x_3)}{(x_1-x_2)(x_1-x_3)},\; p_2(x) = \frac{(x-x_1)(x-x_3)}{(x_2-x_1)(x_2-x_3)}, \; p_3(x) = \frac{(x-x_1)(x-x_2)}{(x_3-x_1)(x_3-x_2)}.
\]

\end{proof}

\noindent $\blacktriangleright$ 7. (page 18) Let $W$ be the subspace of $\mathbb R^4$ spanned by $(1,0,-1,2)$ and $(2,3, 1,1)$. Which linear functions $l(x)=c_1 x_1 + c_2 x_2 + c_3 x_3 + c_4 x_4$
are in the annihilator of $W$?
\begin{proof} (From the textbook's solutions, page 280) $l(x)$ has to be zero for $x=(1,0,-1,2)$ and $x=(2,3,1,1)$. These yield two equations for $c_1$, $\cdots$, $c_4$:
\[
c_1-c_3+2c_4=0,\; 2c_1+3c_2+c_3+c_4 = 0.
\]
We express $c_1$ and $c_2$ in terms of $c_3$ and $c_4$. From the first equation, $c_1=c_3-2c_4$. Setting this into the second equation gives $c_2=-c_3+c_4$.
 \end{proof}

\section{Linear Mappings}

The book's own solution gives answers to Ex 1, 2, 4, 5, 6, 7, 8, 10, 11, 13.

\bigskip

$\bigstar$ {\bf Comments}: To memorize Theorem 5 ($R_T^{\perp} = N_{T'}$), recall for a given $l \in U'$, $(l, Tx) = 0$ for any $x\in X$ if and only if $T'l = 0$.

\bigskip

\noindent $\blacktriangleright$ 1. (page 20) Prove Theorem 1.

\smallskip

(a)\begin{proof}For any $y, y' \in T(X)$, there exist $x,x' \in X$ such that $T(x)=y$ and $T(x')=y'$. So $y+y'=T(x)+T(x')=T(x+x') \in T(X)$. For any $k\in K$, $ky=kT(x)=T(kx)\in T(X)$. Combined, we conclude $T(X)$ is a linear subspace of $U$. \end{proof}

(b) \begin{proof} Suppose $V$ is a linear subspace of $U$. For any $x, x'\in T^{-1}(V)$, there exist $y,y' \in V$ such that $T(x)=y$ and $T(x')=y'$. Since $T(x+x')=T(x)+T(x')=y+y' \in V$, $x+x'\in T^{-1}(V)$. For any $k\in K$, since $T(kx)=kT(x)=ky\in V$, $kx\in T^{-1}(V)$. Combined, we conclude $T^{-1}(V)$ is a linear subspace of $X$.
\end{proof}

\noindent $\blacktriangleright$ 2. (page 24) Let
\[
\sum_{1}^n t_{ij} x_j = u_i, \; i = 1, \cdots, m
\]
be an overdetermined system of linear equations--that is, the number $m$ of equations is greater than the number $n$ of unknowns $x_1$, $\cdots$, $x_n$. Take the case that in spite of the overdeterminacy, this system of equations has a solution, and assume that this solution is unique. Show that it is possible to select a subset of $n$ of these equations which uniquely determine the solution.

\begin{proof} (From the textbook's solution, page 280) Suppose we drop the $i$th equation; if the remaining equations do not determine $x$ uniquely, there is an $x \ne 0$ that is mapped into a vector whose components except the $i$th are zero. If this were true for all $i=1,\cdots, m$, the range of the mapping $x\to u$ would be $m$-dimensional; but according to Theorem 2, the dimension of the range is $\le n < m$. Therefore one of the equations may be dropped without losing uniqueness; by induction $m-n$ of the equations may be omitted.

{\it Alternative solution}: Uniqueness of the solution $x$ implies the column vectors of the matrix $T=(t_{ij})$ are linearly independent. Since the column rank of a matrix equals its row rank (see Chapter 3, Theorem 6 and Chapter 4, Theorem 2), it is possible to select a subset of $n$ of these equations which uniquely determine the solution.
 \end{proof}
 \begin{remark}
 The textbook's solution is a proof that the column rank of a matrix equals its row rank.
 \end{remark}

\noindent $\blacktriangleright$ 3. (page 25) Prove Theorem 3.

\smallskip

(i) \begin{proof} $S\circ T(ax + by) = S(T(ax+by))=S(aT(x)+bT(y))=aS(T(x))+bS(T(y))=a S\circ T(x)+bS\circ T(y)$. So $S\circ T$ is also a linear mapping.
\end{proof}

(ii) \begin{proof} $(R+S)\circ T(x) = (R+S)(T(x))=R(T(x))+S(T(x))=(R\circ T + S\circ T)(x)$ and $S\circ (T+P)(x) = S((T+P)(x))=S(T(x)+P(x))=S(T(x))+S(P(x))=(S\circ T + S\circ P)(x)$.
\end{proof}

\noindent $\blacktriangleright$ 4. (page 25) Show that $S$ and $T$ in Examples 8 and 9 are linear and that $ST \ne TS$.
\begin{proof} For Example 8, the linearity of $S$ and $T$ is easy to see. To see the non-commutativity, consider the polynomial $p(s)=s$. We have $TS(s)=T(s^2)=2s\ne s = S(1)=ST(s)$. So $ST \ne TS$.

For Example 9, $\forall x=(x_1,x_2,x_3)\in X$, $S(x)=(x_1,x_3,-x_2)$ and $T(x)=(x_3,x_2,-x_1)$. So it's easy to see $S$ and $T$ are linear. To see the non-commutativity, note $ST(x)=S(x_3,x_2,-x_1)=(x_3,-x_1,-x_2)$ and $TS(x)=T(x_1,x_3,-x_2)=(-x_2,x_3,-x_1)$. So $ST\ne TS$ in general.
\end{proof}
\begin{remark}
Note the problem does not specify the direction of the rotation, so it is also possible that $S(x)=(x_1,-x_3,x_2)$ and $T(x)=(-x_3,x_2,x_1)$. There are total of four choices of $(S,T)$, and each of the corresponding proofs is similar to the one presented above.
\end{remark}

\noindent $\blacktriangleright$ 5. (page 25) Show that if $T$ is invertible, $TT^{-1}$ is the identity.
\begin{proof}$TT^{-1}(x)=T(T^{-1}(x))=x$ by definition. So $TT^{-1}=id$. \end{proof}

\noindent $\blacktriangleright$ 6. (page 25) Prove Theorem 4.

\smallskip

(i)\begin{proof}Suppose $T: X \to U$ is invertible. Then for any $y, y' \in U$, there exist a unique $x\in X$ and a unique $x'\in X$ such that $T(x)=y$ and $T(x')=y'$. So $T(x+x')=T(x)+T(x')=y+y'$ and by the injectivity of $T$, $T^{-1}(y+y')=x+x'=T^{-1}(y)+T^{-1}(y')$. For any $k\in K$, since $T(kx)=kT(x)=ky$, injectivity of $T$ implies $T^{-1}(ky)=kx=kT^{-1}(y)$. Combined, we conclude $T^{-1}$ is linear.
\end{proof}

(ii)\begin{proof}Suppose $T: X\to U$ and $S: U\to V$. First, by the definition of multiplication, $ST$ is a linear map. Second, if $x\in X$ is such that $ST(x)=0 \in V$, the injectivity of $S$ implies $T(x)=0 \in U$ and the injectivity of $T$ further implies $x=0 \in X$. So, $ST$ is one-to-one. For any $z\in V$, there exists $y\in U$ such that $S(y)=z$. Also, we can find $x\in X$ such that $T(x)=y$. So $ST(x)=S(y)=z$. This shows $ST$ is onto. Combined, we conclude $ST$ is invertible.

By associativity, we have $(ST)(T^{-1}S^{-1})=((ST)T^{-1})S^{-1}=(S(TT^{-1}))S^{-1}=SS^{-1}=\mbox{id}_V$. Replace $S$ with $T^{-1}$ and $T$ with $S^{-1}$, we also have $(T^{-1}S^{-1})(ST)=\mbox{id}_{X}$. Therefore, we can conclude $(ST)^{-1}=T^{-1}S^{-1}$.
\end{proof}

\noindent $\blacktriangleright$ 7. (page 26) Show that whenever meaningful,
\[
(ST)'=T'S', (T+R)'=T'+R', \;\mbox{and}\; (T^{-1})'=(T')^{-1}.
\]

\smallskip

(i)
\begin{proof}Suppose $T: X\to U$ and $S: U\to V$ are linear maps. Then for any given $l\in V'$, $((ST)'l,x)=(l,STx)=(S'l,Tx)=(T'S'l,x)$, $\forall x\in X$. Therefore, $(ST)'l=T'S'l$. Let $l$ run through every element of $V'$, we conclude $(ST)'=T'S'$.
\end{proof}

(ii)
\begin{proof}
Suppose $T$ and $R$ are both linear maps from $X$ to $U$. For any given $l\in U'$, we have $((T+R)'l,x)=(l,(T+R)x)=(l,Tx+Rx)=(l,Tx)+(l,Rx)=(T'l,x)+(R'l,x)=((T'+R')l,x)$, $\forall x \in X$. Therefore $(T+R)'l=(T'+R')l$. Let $l$ run through every element of $V'$, we conclude $(T+R)'=T'+R'$.
\end{proof}

(iii)
\begin{proof}
Suppose $T$ is an isomorphism from $X$ to $U$, then $T^{-1}$ is a well-defined linear map. We first show $T'$ is an isomorphism from $U'$ to $X'$. Indeed,  if $l\in U'$ is such that $T'l=0$, then for any $x\in X$, $0=(T'l,x)=(l,Tx)$. As $x$ varies and goes through every element of $X$, $Tx$ goes through every element of $U$. By considering the identification of $U$ with $U''$, we conclude $l=0$. So $T'$ is one-to-one. For any given $m\in X'$, define $l=mT^{-1}$, then $l\in U'$. For any $x\in X$, we have $(m,x)=(m,T^{-1}(Tx))=(l,Tx)=(T'l,x)$. Since $x$ is arbitrary, $m=T'l$ and $T'$ is therefore onto. Combined, we conclude $T'$ is an isomorphism from $U'$ to $X'$ and $(T')^{-1}$ is hence well-defined.

By part (i), $(T^{-1})'T'=(TT^{-1})'=(\mbox{id}_U)'=\mbox{id}_{U'}$ and $T'(T^{-1})'=(T^{-1}T)'=(\mbox{id}_X)'=\mbox{id}_{X'}$. This shows $(T^{-1})'=(T')^{-1}$.
\end{proof}

\noindent $\blacktriangleright$ 8. (page 26) Show that if $X''$ is identified with $X$ and $U''$ with $U$ via (5) in Chapter 2, then
\[
T''=T.
\]
\begin{proof} Suppose $\xi: X \to X''$ and $\eta: U\to U''$ are the isomorphisms defined in Chapter 2, formula (5), which identify $X$ with $X''$ and $U$ with $U''$, respectively. Then for any $x\in X$ and $l\in U'$, we have
\[
(T''\xi_x,l)=(\xi_x,T'l)=(T'l,x)=(l,Tx)=(\eta_{Tx},l).
\]Since $l$ is arbitrary, we must have $T''\xi_x=\eta_{Tx}$, $\forall x \in X$. Hence, $T''\circ\xi=\eta\circ T$, which is the precise interpretation of $T''=T$.
\end{proof}

\noindent $\blacktriangleright$ 9. （page 28) Show that if $A$ in ${\cal L}(X,X)$ is a left inverse of $B$ in ${\cal L}(X,X)$, that is $AB=I$, then it is also a right inverse: $BA=I$.
\begin{proof} If $Bx=0$, by applying $A$ to both sides of the equation and $AB=I$, we conclude $x=0$. So $B$ is injective. By Corollary B of Theorem 2, $B$ is surjective.  Therefore the inverse of $B$, denoted by $B^{-1}$, always exists, and $A=A(BB^{-1})=(AB)B^{-1}=IB^{-1}=B^{-1}$, which implies $BA=I$. \end{proof}
\begin{remark}
For a general algebraic structure, e.g. a ring with unit, it's not always the case that an element's right inverse equals to its left inverse. In the proof above, we used the fact that for finite dimensional linear vector space, a linear mapping is injective if and only if it's surjective.
\end{remark}

\noindent $\blacktriangleright$ 10. (page 30) Show that if $M$ is invertible, and similar to $K$, then $K$ also is invertible, and $K^{-1}$ is similar to $M^{-1}$.
\begin{proof} Suppose $K=M_S$. Then $K(M^{-1})_{S}=SMS^{-1}SM^{-1}S^{-1}=I$. By Exercise 9, $K$ is also invertible and $K^{-1}=(M^{-1})_S$. \end{proof}

\noindent $\blacktriangleright$ 11. (page 30) Prove Theorem 9.
\begin{proof}
Suppose $A$ is invertible, we have $AB=AB(AA^{-1}) = A(BA)A^{-1}$. So $AB$ and $BA$ are similar. The case of $B$ being invertible can be proved similarly.
\end{proof}

\noindent $\blacktriangleright$ 12. (page 31) Show that $P$ defined above is a linear map, and that it is a projection.
\begin{proof}For any $\alpha,\beta \in K$ and $x=(x_1,\cdots,x_n)$, $y=(y_1,\cdots,y_n)$, we have
\begin{eqnarray*}
P(\alpha x+ \beta y)&=&P((\alpha x_1+\beta y_1,\cdots,\alpha x_n+\beta y_n))\\
&=&(0,0,\alpha x_3+\beta y_3,\cdots,\alpha x_n+\beta y_n)\\
&=&(0,0,\alpha x_3,\cdots,\alpha x_n)+(0,0,\beta y_3,\cdots,\beta y_n)\\
&=&\alpha(0,0,x_3,\cdots,x_n)+\beta(0,0,y_3,\cdots,y_n)\\
&=&\alpha P(x)+\beta P(y).
\end{eqnarray*}
This shows $P$ is a linear map. Furthermore, we have
\[
P^2(x)=P((0,0,x_3,\cdots,x_n))=(0,0,x_3,\cdots,x_n)=P(x).
\]
So $P$ is a projection.
\end{proof}

\noindent $\blacktriangleright$ 13. (page 31) Prove that $P$ defined above is linear, and that it is a projection.
\begin{proof}For any $\alpha,\beta\in K$ and $f,g\in C[-1,1]$, we have
\begin{eqnarray*}
P(\alpha f+\beta g)(x)
&=& \frac{1}{2}[(\alpha f+\beta g)(x)+(\alpha f+\beta g)(-x)] \\
&=& \frac{\alpha}{2}[f(x)+ f(-x)]+\frac{\beta}{2}[g(x)+ g(-x)] \\
&=& \alpha P(f)(x) + \beta P(g) (x).
\end{eqnarray*}
This shows $P$ is a linear map. Furthermore, we have
\begin{eqnarray*}
(P^2f)(x)
&=&(P(Pf))(x)=P\left(\frac{f(\cdot)+f(-\cdot)}{2}\right)(x)
=\frac{1}{2}\left[\frac{f(x)+f(-x)}{2}+\frac{f(-x)+f(x)}{2}\right]\\
&=&\frac{1}{2}(f(x)+f(-x))=(Pf)(x).
\end{eqnarray*}
So $P$ is a projection.
\end{proof}

\noindent $\blacktriangleright$ 14. (page 31) Suppose $T$ is a linear map of rank 1 of a finite dimensional vector space into itself.

(a) Show there exists a unique number $c$ such that $T^2=cT$.

(b) Show that if $c\ne 1$ then $I-T$ has an inverse. (As usual $I$ denotes the identity map $Ix=x$.)

\smallskip

(a)
\begin{proof}
Since $\dim R_T=1$, it suffices to prove the following claim: {\it if $T$ is a linear map on a 1-dimensional linear vector space $X$, there exists a unique number $c$ such that $T(x)=cx$, $\forall x \in X$}. We assume the underlying filed $K$ is either $\mathbb R$ or $\mathbb C$. We further assume $S: X\to K$ is an isomorphism. Then $S\circ T\circ S^{-1}$ is a linear map on $K$. Define $c=S\circ T \circ S^{-1}(1)$, we have
\[
S\circ T \circ S^{-1}(k)=S\circ T \circ S^{-1}(k\cdot 1) = k\cdot c, \;\forall k \in K.
\]
So  $T \circ S^{-1}(k)=S^{-1}(c\cdot k)=cS^{-1}(k)$, $\forall k \in K$. This shows $T$ is a scalar multiplication.
\end{proof}

(b)
\begin{proof}
If $c\ne 1$, it's easy to verify $I+\frac{1}{1-c}T$ is the inverse of $I-T$.
\end{proof}

\noindent $\blacktriangleright$ 15. (page 31) Suppose $T$ and $S$ are linear maps of a finite dimensional vector space into itself. Show that the rank of $ST$ is less than or equal the rank of $S$. Show that the dimension of the nullspace of $ST$ is less than or equal the sum of the dimensions of the nullspaces of $S$ and of $T$.
\begin{proof} Because $R_{ST}\subset R_S$, $\mbox{rank}(ST) = \dim
(R_{ST}) \le \dim R_S = \mbox{rank}(S)$. Moreover, since the column rank of a matrix equals its row rank (see Chapter 3, Theorem 6 and Chapter 4, Theorem 2), we have $\mbox{rank}(ST)=\mbox{rank}(T'S') \le \mbox{rank}(T')=\mbox{rank}(T)$. Combined, we conclude $\mbox{rank}(ST) \le \min\{\mbox{rank}(S), \mbox{rank}(T)\}$.

Also, we note $N_{ST}/N_T$ is isomorphic to
$N_S\cap R_T$, with the isomorphism defined by $\phi(\{x\})=Tx$,
where $\{x\}:=x+N_T$. It's easy to see $\phi$ is well-defined, is
linear, and is both injective and surjective. So by Theorem 6 of
Chapter 1,
\[
\dim N_{ST} = \dim N_T + \dim N_{ST}/N_T = \dim N_T + \dim (N_S\cap
R_T) \le \dim N_T + \dim N_S.
\]
\end{proof}

\begin{remark}
The result $\mbox{rank}(ST) \le \min\{\mbox{rank}(S), \mbox{rank}(T)\}$ is used in econometrics. Cf. Greene \cite[page~985]{Greene11} Appendix A.
\end{remark}


\section{Matrices}
The book's own solution gives answers to Ex 1, 2, 4.

\bigskip

\noindent $\blacktriangleright$ 1. (page 35) Let $A$ be an arbitrary $m \times n$ matrix, and let $D$ be an $m \times n$ diagonal matrix,
\[
D_{ij}=
\begin{cases}
d_i \; \mbox{if $i=j$} \\
0   \; \mbox{if $i\ne j$}.
\end{cases}
\]
Show that the $i$th row of $DA$ equals $d_i$ times the $i$th row of $A$, and show that the $j$th column of $AD$ equals $d_j$ times the $j$th column of $A$.

\begin{proof} It looks the phrasement of the exercise is problematic: when $m\ne n$, $AD$ or $DA$ may not be well-defined. So we will assume $m=n$ in the below. We can write $A$ in the row form $\left[\begin{matrix}r_1 \\ r_2 \\ \cdots \\ r_m\end{matrix}\right]$. Then $DA$ can be written as
\[
DA = \left[
\begin{matrix}
d_1 & 0 & \cdots & 0 \\ 0 & d_2 & \cdots & 0 \\ \cdots & \cdots & \cdots & \cdots \\ 0 & 0 & \cdots & d_n
\end{matrix}
\right]
\left[\begin{matrix}r_1 \\ r_2 \\ \cdots \\ r_n\end{matrix}\right] = \left[\begin{matrix} d_1r_1 \\ d_2r_2 \\ \cdots \\ d_nr_n \end{matrix}\right]
\]
We can also write $A$ in the column form $[c_1, c_2, \cdots, c_n]$, then $AD$ can be written as
\[
AD = [c_1,c_2,\cdots,c_n]\left[
\begin{matrix}
d_1 & 0 & \cdots & 0 \\ 0 & d_2 & \cdots & 0 \\ \cdots & \cdots & \cdots & \cdots \\ 0 & 0 & \cdots & d_n
\end{matrix}
\right]
 = \left[d_1c_1, d_2c_2, \cdots, d_nc_n\right]
\]
\end{proof}

\noindent $\blacktriangleright$ 2. (page 37) Look up in any text the proof that the row rank of a matrix equals its column rank, and compare it to the proof given in the present text.
\begin{proof} Proofs in most textbooks are lengthy and complicated. For a clear, although still lengthy, proof, see 丘维声 \cite[page~112]{Qiu96a}, Theorem 3.5.3. \end{proof}

\noindent $\blacktriangleright$ 3. (page 38) Show that the product of two matrices in $2\times 2$ block form can be evaluated as
\[
\left(
  \begin{array}{cc}
    A_{11} & A_{12} \\
    A_{21} & A_{22} \\
  \end{array}
\right)
\left(
  \begin{array}{cc}
    B_{11} & B_{12} \\
    B_{21} & B_{22} \\
  \end{array}
\right)
=
\left(
  \begin{array}{cc}
    A_{11}B_{11} + A_{12}B_{21} & A_{11}B_{12} + A_{12}B_{22} \\
    A_{21}B_{11} + A_{22}B_{21} & A_{21}B_{12} + A_{22}B_{22} \\
  \end{array}
\right)
\]
\begin{proof}The calculation is a bit messy. We refer the reader to 丘维声\cite[page~190]{Qiu96a}, Theorem 4.6.1. \end{proof}


\noindent $\blacktriangleright$ 4. (page 38) Construct two $2\times 2$ matrices $A$ and $B$ such that $AB=0$ but $BA\ne 0$.
\begin{proof} Let $A=\left[\begin{matrix}1 & 1 \\ 0 & 0 \end{matrix}\right]$ and $B=\left[\begin{matrix}1 & 2 \\ -1 & -2\end{matrix}\right]$. Then $AB=0$ yet $BA=\left[\begin{matrix}1 & 1 \\ -1 & -1 \end{matrix}\right]\ne 0$. \end{proof}

\noindent $\blacktriangleright$ 5. (page 40) Show that $x_1$, $x_2$, $x_3$, and $x_4$ given by $(20)_j$ satisfy all four equations (20).
\begin{proof}
\[
\left[
\begin{matrix}
1 & 2 & 3 & -1 \\
2 & 5 & 4 & -3 \\
2 & 3 & 4 & 1 \\
1 & 4 & 2 & -2
\end{matrix}
\right]
\left[
\begin{matrix}
1 \\
2\\
-2\\
1
\end{matrix}
\right]
=
\left[
\begin{matrix}
1\cdot 1 + 2 \cdot 2 + 3 \cdot (-2) + (-1)\cdot 1 \\
2\cdot 1 + 5 \cdot 2 + 4 \cdot (-2) + (-3)\cdot 1 \\
2\cdot 1 + 3 \cdot 2 + 4 \cdot (-2) + 1 \cdot 1 \\
1\cdot 1 + 4 \cdot 2 + 2 \cdot (-2) + (-2)\cdot 1
\end{matrix}
\right]
=
\left[
\begin{matrix}
-2 \\
1 \\
1 \\
3
\end{matrix}
\right]
\]
 \end{proof}

\noindent $\blacktriangleright$ 6. (page 41) Choose values of $u_1$, $u_2$, $u_3$, $u_4$ so that condition (23) is satisfied, and determine all solutions of equations (22).
\begin{proof} We choose $u_1=u_2=u_3=1$ and $u_4=2$. Then $x_3=-5x_4-u_3-u_2+3u_1=-5x_4+1$, $x_2=7x_4+u_4-3u_1=7x_4-1$, and $x_1=u_1-x_2-2x_3-3x_4=1-(7x_4-1)-2(-5x_4+1)-3x_4=0$. \end{proof}

\noindent $\blacktriangleright$ 7. (page 41) Verify that $l=(1, -2, -1, 1)$ is a left nullvector of $M$:
\[
lM=0.
\]

\begin{proof}
\begin{eqnarray*}
& & [1,-2,-1,1]\left[\begin{matrix}1 & 1 & 2 & 3 \\ 1 & 2 &3 & 1 \\ 2 & 1 & 2 & 3 \\ 3 & 4 & 6 & 2\end{matrix}\right]\\
&=& [1\cdot 1-2\cdot 1 -1 \cdot 2+1\cdot 3, 1\cdot 1 -2 \cdot 2 -1 \cdot 1 + 1\cdot 4, 1\cdot 2 -2 \cdot 3 -1 \cdot 2 + 1\cdot 6, 1\cdot 3 -2 \cdot 1 -1 \cdot 3 + 1\cdot 2] \\
&=& 0.
\end{eqnarray*}
\end{proof}

\noindent $\blacktriangleright$ 8. (page 42) Show by Gaussian elimination that the only left nullvectors of $M$ are multiples of $l$ in Exercise 7, and then use Theorem 5 of Chapter 3 to show that condition (23) is sufficient for the solvability of the system (22).
\begin{proof} Suppose a row vector $x=(x_1,x_2,x_3,x_4)$ satisfies $xM=0$. Then we can proceed according to Gaussian elimination
\[
\begin{cases}
x_1 + x_2 + 2x_3 + 3x_4 = 0 \\
x_2 + 2x_2 + x_3 + 4x_4 = 0 \\
2x_1 + 3x_2 + 2x_3 + 6x_4 = 0 \\
3x_1 + x_2 + 3x_3 + 2x_4 =0
\end{cases}
\Rightarrow
\begin{cases}
x_2-x_3+x_4=0\\
x_2-2x_3=0\\
-2x_2-3x_3-7x_4=0
\end{cases}
\Rightarrow
\begin{cases}
-x_3-x_4=0\\
-5x_3-5x_4=0.
\end{cases}
\]
So we  have $x_1=x_4$, $x_2=-2x_4$, and $x_3=-x_4$, i.e. $x=x_4(1, -2, -1, 1)$, a multiple of $l$ in Exercise 7. Equation (22) has a solution if and only if $u=\left[\begin{matrix}u_1 \\ u_2 \\ u_3 \\ u_4 \end{matrix}\right]$ is in $R_M$. By Theorem 5 of Chapter 3, this is equivalent to $yu=0$, $\forall y \in N_{M'}$ (elements of $N_{M'}$ are seen as row vectors). We have proved $y$ is a multiple of $l$. Hence condition (23), which is just $lu=0$, is sufficient for the solvability of the system (22).
\end{proof}

\section{Determinant and Trace}
The book's own solution gives answers to Ex 1, 2, 3, 4, 5.

\bigskip

$\bigstar$ {\bf Comments}:

\smallskip

1) For a more intuitive proof of Theorem 2 ($\det(BA) = \det A \det B$), see Munkres \cite[page~18]{Munkres97}, Theorem 2.10.

\medskip

2) The following proposition is one version of Cramer's rule and will be used in the proof of Lemma 6, Chapter 6 (formula (21) on page 68).
\begin{prop}\label{prop_extended_cramer}
Let $A$ be an $n\times n$ matrix and $B$ defined as the matrix of {\it cofactors} of $A$; that is,
\[
B_{ij} = (-1)^{i+j} \det A_{ji},
\]
where $A_{ji}$ is the ($ji$)th minor of $A$. Then $AB=BA=\det A \cdot I_{n\times n}$.
\end{prop}
\begin{proof}
Suppose $A$ has the column form $A = (a_1, \cdots, a_n)$. By replacing the $j$th column with the $i$th column in $A$, we obtain
\[
M = (a_1, \cdots, a_{j-1}, a_i, a_j, \cdots, a_n).
\]
On one hand, Property (i) of a determinant gives $\det M = \delta_{ij} \det A$; on the other hand, Laplace expansion of a determinant gives
\begin{eqnarray*}
\det M = \sum_{k=1}^n (-1)^{k+j} a_{ki} \det A_{kj} = \sum_{k=1}^n a_{ki} B_{jk} = (B_{j1}, \cdots, B_{jn})
\left(
  \begin{array}{c}
    a_{1i} \\
    \vdots \\
    a_{ni} \\
  \end{array}
\right)
.
\end{eqnarray*}
Combined, we can conclude $\det A \cdot I_{n\times n} = BA$. By replacing the $i$th column with the $j$th column in $A$, we can get similar result for $AB$.
\end{proof}

\bigskip

\noindent $\blacktriangleright$ 1. (page 47) Prove properties (7).

\smallskip

(a)\begin{proof}By formula (5), we have $|P(x_1,\cdots,x_n)|=|\prod_{i<j}(x_i-x_j)|=|\prod_{i\ne j}(x_i-x_j)|=|P(p(x_1,\cdots,x_n))|$. By formula (6), we have $|P(p(x_1,\cdots,x_n)|=|\sigma(p)||P(x_1,\cdots,x_n)|$. Combined, we conclude $|\sigma(p)|=1$. \end{proof}

(b)\begin{proof} By definition, we have
\[
P(p_1\circ p_2(x_1,\cdots,x_n))=P(p_1(p_2(x_1,\cdots,x_n)))=\sigma(p_1)P(p_2(x_1,\cdots,x_2))=\sigma(p_1)\sigma(p_2)P(x_1,\cdots,x_n).
\]So $\sigma(p_1\circ p_2)=\sigma(p_1)\sigma(p_2)$.
\end{proof}

\noindent $\blacktriangleright$ 2. (page 48) Prove (c) and (d) above.
\begin{proof}To see (c) is true, we suppose $t$ interchange $i_0$ and $j_0$. Without loss of generality, we assume $i_0<j_0$. Then
\begin{eqnarray*}
P(t(x_1,\cdots,x_n))
&=&P(x_1,\cdots,x_{j_0},\cdots,x_{i_0},\cdots,x_n)\\
&=&(x_{j_0}-x_{i_0}) \prod_{i<j, (i,j)\ne (i_0,j_0)}(x_i-x_j)\\
&=& -\prod_{i<j}(x_i-x_j)\\
&=&-P(x_1,\cdots,x_n).
\end{eqnarray*}
So $\sigma(t) = -1$.

To see (d) is true, note formula (9) is equivalent to  $\mbox{id}=t_k\circ \cdots t_1 \circ p^{-1}$.
Acting these operations on $(1,\cdots,n)$, we have
$(1,\cdots,n)=t_k\circ \cdots \circ t_1
(p^{-1}(1),\cdots,p^{-1}(n))$. Then the problem is reduced to
proving that a sequence of transpositions can sort an array of
numbers into ascending order. There are many ways to achieve that.
For example, we can let $t_1$ be the transposition that interchanges
$p^{-1}(1)$ and $p^{-1}(i_0)$, where $i_0$ satisfies
$p^{-1}(i_0)=1$. That is, $t_1$ puts $1$ in the first position of
the sequence. Then we let $t_2$ be the transposition that puts 2 to
the second position. We continue this procedure until we sort out
the whole sequence. This shows sorting can be  accomplished by a
sequence of transpositions.
\end{proof}

\noindent $\blacktriangleright$ 3. (page 48) Show that the decomposition (9) is not unique, but that the parity of the member $k$ of factors is unique.
\begin{proof}For any transposition $t$, we have $t\circ t = \mbox{id}$. So if $p = t_k\circ \cdots \circ t_1 $, we can get another decomposition $p = t_k\circ \cdots \circ t_1 \circ t \circ t$. This shows the decomposition is not unique.

Suppose the permutation $p$ has two different decompositions into transpositions: $p=t_k\circ \cdots \circ t_1 = t'_m\circ\cdots\circ t'_1$. By formula (7), part (b) and formula (8), $\sigma(p)=(-1)^k=(-1)^m$. So $k-m$ is an even number. This shows the parity of the member of factors is unique.\end{proof}

\noindent $\blacktriangleright$ 4. (page 49) Show that $D$ defined by (16) has Properties (ii), (iii) and (iv).
\begin{proof}
To verify Property (ii), note for any index $j$, $\alpha,\beta\in K$, we have
\begin{eqnarray*}
& & D(a_1,\cdots,\alpha a_j'+\beta a_j,\cdots,a_n)\\
&=&\sum \sigma(p)a_{p_11}\cdots(\alpha a_{p_jj}'+\beta a_{p_jj}')\cdots a_{p_nn}\\
&=&\sum [\alpha \sigma(p)a_{p_11}\cdots a_{p_jj}'\cdots a_{p_nn} + \beta \sigma(p)a_{p_11}\cdots a_{p_jj}\cdots a_{p_nn}]\\
&=&\alpha D(a_1,\cdots,a_j',\cdots,a_n)+\beta D(a_1,\cdots,a_j,\cdots,a_n).
\end{eqnarray*}

To verify Property (iii), note $e_{p_11}\cdots e_{p_nn}$ is non-zero if and only if $p_i=i$ for any $1\le i\le n$. In this case the product is 1.

To verify Property (iv), note for any $i\ne j$, if we denote by $t$ the transposition that interchanges $i$ and $j$, then $p\mapsto p\circ t$ is a one-to-one and onto map from the set of all permutations to itself. Therefore, we have
\begin{eqnarray*}
& & D(a_1,\cdots,a_i,\cdots,a_j,\cdots,a_n)\\
&=&\sum \sigma(p)a_{p_11}\cdots a_{p_ii}\cdots a_{p_jj}\cdots a_{p_nn}\\
&=&\sum (-1)\sigma(p\circ t)a_{p\circ t_11}\cdots a_{p\circ t_ji}\cdots a_{p\circ t_ij}\cdots a_{p\circ t_nn}\\
&=&\sum (-1)\sigma(p\circ t)a_{p\circ t_11}\cdots a_{p\circ t_ij}\cdots a_{p\circ t_ji}\cdots a_{p\circ t_nn}\\
&=&(-1)\sum \sigma(q)a_{q_11}\cdots a_{q_ij}\cdots a_{q_ji}\cdots a_{q_nn}\\
&=&-D(a_1,\cdots,a_j,\cdots,a_i,\cdots,a_n).
\end{eqnarray*}
\end{proof}

\noindent $\blacktriangleright$ 5. (page 49) Show that Property (iv) implies Property (i), unless the field $K$ has characteristic two, that is, $1+1=0$.
\begin{proof}By property (iv),
$
D(a_1,\cdots,a_i,\cdots,a_i,\cdots,a_n)=-D(a_1,\cdots,a_i,\cdots,a_i,\cdots,a_n).
$
So add to both sides of the equations $D(a_1,\cdots,a_i,\cdots,a_i,\cdots,a_n)$ , we have $2D(a_1,\cdots,a_i,\cdots,a_i,\cdots,a_n)=0$. If the character of the field $K$ is not two, we can conclude $D(a_1,\cdots,a_i,\cdots,a_i,\cdots,a_n)=0$.
\end{proof}
\begin{remark} This exercise and Exercise 5.4 together show formula
(16) is equivalent to Properties (i)-(iii), provided the character
of $K$ is not two. Therefore, for $K=\mathbb R$ or $\mathbb C$, we
can either use (16) or properties (i)-(iii) as the definition of determinant.
\end{remark}

\noindent $\blacktriangleright$ 6. (page 52) Verify that $C(A_{11})$ has properties (i)-(iii).
\begin{proof}
If two column vectors $a_i$ and $a_j$ $(i\ne j)$ of $A_{11}$ are equal, we have $\left[\begin{matrix}0 \\ a_i\end{matrix}\right]=\left[\begin{matrix}0 \\ a_j\end{matrix}\right]$. So $C(A_{11})=0$ and property (i) is satisfied. Since any linear operation on a column vector $a_i$ of $A_{11}$ can be naturally extended to $\left[\begin{matrix}0 \\ a_i\end{matrix}\right]$, property (ii) is also satisfied. Finally, we note when $A_{11}=I_{(n-1)\times(n-1)}$, $\left[\begin{matrix}1 & 0 \\ 0 & A_{11}\end{matrix}\right] = I_{n\times n}$. So property (iii) is satisfied.
\end{proof}

\noindent $\blacktriangleright$ 7. (page 52) Deduce Corollary 5 from Lemma 4.
\begin{proof} We first move the $j$-th column to the position of the first
column. This can be done by interchanging neighboring columns
$(j-1)$ times. The determinant of the resulted matrix $A_1$ is
$(-1)^{j-1}\det A$. Then we move the $i$-th row to the position
of the first row. This can be done by interchanging neighboring rows
$(i-1)$ times. The resulted
matrix $A_2$ has a determinant equal to  $(-1)^{i-1}\det A_1=(-1)^{i+j}\det A$. On
the other hand, $A_2$ has the form of $\left(\begin{matrix}1 & * \\
0 & A_{ij}\end{matrix}\right)$. By Lemma 4, we have
$\det A_{ij}=\det A_2=(-1)^{i+j}\det A$. So
$\det A=(-1)^{i+j}\det A_{ij}$.
\end{proof}
\begin{remark}
Rigorously speaking, we only proved that swapping two neighboring columns will give a minus sign to the determinant (Property (iv)), but we haven't proved this property for neighboring rows. This can be made rigorous by using $\det A = \det A^T$ (Exercise 8 of this chapter).
\end{remark}

\noindent $\blacktriangleright$ 8. (page 54) Show that for any square matrix
\[
\det A^T = \det A, \; A^T = \mbox{transpose of $A$}
\]
[{\it Hint}: Use formula (16) and show that for any permutation $\sigma(p)=\sigma(p^{-1})$.]
\begin{proof}We first show for any permutation $p$, $\sigma(p)=\sigma(p^{-1})$. Indeed, by formula (7)(b), we have $1=\sigma(id)=\sigma(p\circ p^{-1})=\sigma(p)\sigma(p^{-1})$. By formula (7)(a), we conclude $\sigma(p)=\sigma(p^{-1})$. Second, we denote by $b_{ij}$ the $(i,j)$-th entry of $A^T$. Then $b_{ij}=a_{ji}$. By formula (16) and the fact that $p\mapsto p^{-1}$ is a one-to-one and onto map from the set of all permutations to itself, we have
\begin{eqnarray*}
\det A^T&=&\sum \sigma(p)b_{p_11}\cdots b_{p_nn}\\
 &=& \sum\sigma(p)a_{1p_1}\cdots a_{np_n}\\
 &=& \sum\sigma(p^{-1})a_{(p^{-1}\circ p)_1p_1}\cdots a_{(p^{-1}\circ p)_np_n}\\
 &=& \sum\sigma(p^{-1})a_{p^{-1}(p_1)p_1}\cdots a_{p^{-1}(p_n)p_n}\\
 &=& \sum\sigma(p^{-1})a_{p^{-1}(1)1}\cdots a_{p^{-1}(n)n}\\
 &=& \det A.
\end{eqnarray*}
\end{proof}

\noindent $\blacktriangleright$ 9. (page 54) Given a permuation $p$ of $n$ objects, we define an associated so-called {\it permutation matrix} $P$ as follows:
\[
P_{ij}=
\begin{cases}
1, \;\mbox{if $j=p(i)$,} \\
0, \;\mbox{otherwise.}
\end{cases}
\]
Show that the action of $P$ on any vector $x$ performs the permutation $p$ on the components of $x$. Show that if $p$, $q$ are two permutations and $P$, $Q$ are the associated permutation matrices, then the permutation matrix associated with $p\circ q$ is the product $PQ$.
\begin{proof}By Exercise 2, it suffices to prove the property for transpositions. Suppose $p$ interchanges $i_1,i_2$ and $q$ interchanges $j_1,j_2$. Denote by $P$ and $Q$ the corresponding permutation matrices, respectively. Then for any $x=(x_1,\cdots,x_n)^T \in \mathbb R^n$, we have
($\delta_{ij}$ is the Kronecker sign)
\begin{eqnarray*}
(Px)_i=\sum P_{ij}x_j =\sum \delta_{p(i)j}x_j=\begin{cases}x_{i_2} & \mbox{if $i=i_1$}\\ x_{i_1} & \mbox{if $i=i_2$}\\ x_i & \mbox{otherwise.}\end{cases}
\end{eqnarray*}
This shows the action of $P$ on any column vector $x$ performs the permutation $p$ on the components of $x$. Similarly, we have
\begin{eqnarray*}
(Qx)_i=\begin{cases}x_{j_2} & \mbox{if $i=j_1$}\\ x_{j_1} & \mbox{if $i=j_2$}\\ x_i & \mbox{otherwise.}\end{cases}
\end{eqnarray*}
Since $(PQ)(x)=P(Q(x))$, the action of matrix $PQ$ on $x$ performs first the permutation $q$ and then the permutation $p$ on the components of $x$. Therefore, the permutation matrix associated with $p\circ q$ is the product of $P$ and $Q$.
 \end{proof}

\noindent $\blacktriangleright$ 10. (page 56) Let $A$ be an $m\times n$ matrix, $B$ an $n\times m$ matrix. Show that
\[
\mbox{tr} AB = \mbox{tr} BA
\]

\begin{proof}
\[
\mbox{tr}(AB)=\sum_{i=1}^m(AB)_{ii}=\sum_{i=1}^m\sum_{j=1}^na_{ij}b_{ji}=\sum_{j=1}^m\sum_{i=1}^na_{ji}b_{ij}=\sum_{i=1}^n\sum_{j=1}^mb_{ij}a_{ji}=\sum_{i=1}^n(BA)_{ii}=\mbox{tr}(BA),
\]
where the third equality is obtained by interchanging the names of the indices $i,j$.
\end{proof}

\noindent $\blacktriangleright$ 11. (page 56) Let $A$ be an $n\times n$ matrix, $A^T$ its transpose. Show that
\[
\mbox{tr}AA^T = \sum a_{ij}^2.
\]
The square root of the double sum on the right is called the Euclidean, or Hilbert-Schmidt, norm of the matrix $A$.
\begin{proof}
\[
\mbox{tr}(AA^T)=\sum_i(AA^T)_{ii}=\sum_i\sum_jA_{ij}A^{T}_{ji}=\sum_i\sum_ja_{ij}a_{ij}=\sum_{ij}a_{ij}^2.
\]
\end{proof}

\noindent $\blacktriangleright$ 12. (page 56) Show that the determinant of the $2\times 2$ matrix
\[
\left(
  \begin{array}{cc}
    a & b \\
    c & d \\
  \end{array}
\right)
\]
is $D=ad-bc$.
\begin{proof}
Apply Laplace expansion of a determinant according to its columns (Theorem 6).
 \end{proof}

\noindent $\blacktriangleright$ 13. (page 56) Show that the determinant of an upper triangular matrix, one whose elements are zero below the main diagonal, equals the product of its elements along
the diagonal.
\begin{proof}
Apply Laplace expansion of a determinant according to its columns (Theorem 6) and work by induction.
 \end{proof}

\noindent $\blacktriangleright$ 14. (page 57) How many multiplications does it take to evaluate $\det A$ by using Gaussian elimination to bring it into upper triangular form?
\begin{proof}
Denote by $M(n)$ the number of multiplications needed to evaluate $\det A$ of an $n\times n$ matrix $A$ by using Gaussian elimination to bring it into upper triangular form. To use the first row to eliminate $a_{21}$, $a_{31}$, $\cdots$, $a_{n1}$, we need $n(n-1)$ multiplications. So $M(n)=n(n-1)+M(n-1)$ with $M(1) = 0$. So $M(n)=\sum_{k=1}^nk(k-1)=\frac{n(n+1)(2n+1)}{6}-\frac{n(n+1)}{2}=\frac{(n-1)n(n+1)}{3}$.
\end{proof}

\noindent $\blacktriangleright$ 15. (page 57) How many multiplications does it take to evaluate $\det A$ by formula (16)?
\begin{proof} Denote by $M(n)$ the number of multiplications needed to evaluate the determinant of an $n\times n$ matrix by formula (16). Then $M(n) = nM(n-1)$. So $M(n)=n!$. \end{proof}

\noindent $\blacktriangleright$ 16. (page 57) Show that the determinant of a $(3\times 3)$ matrix
\[
A =
\left(
  \begin{array}{ccc}
    a & b & c \\
    d & e & f \\
    g & h & i \\
  \end{array}
\right)
\]
can be calculated as follows. Copy the first two columns of $A$ as a fourth and fifth column:
\[
\left(
  \begin{array}{ccccc}
    a & b & c & a & b \\
    d & e & f & d & e \\
    g & h & i & g & h \\
  \end{array}
\right)
\]
\[
\det A = aei + bfg + cdh - gec - hfa - idb.
\]
Show that the sum of the products of the three entries along the dexter diagonals, minus the sum of the products of the three entries along the sinister diagonals is equal to the determinant of $A$.
\begin{proof}
We apply Laplace expansion of a determinant according to its columns (Theorem 6):
\begin{eqnarray*}
\det \left[\begin{matrix}a & b & c \\ d & e & f \\ g & h & i \end{matrix}\right] &=& a \det \left[\begin{matrix}e & f \\ h & i \end{matrix}\right] - d \det \left[\begin{matrix}b & c \\ h & i \end{matrix}\right] + g \det \left[\begin{matrix}b & c \\ e & f \end{matrix}\right] \\
&=& a(ie-fh) - d(ib-ch) + g(bf -ce) \\
&=& aei + bfg + cdh - gec - afh - idb.
\end{eqnarray*}
\end{proof}

\section{Spectral Theory}

The book's own solution gives answers to Ex 2, 5, 7, 8, 12.

\bigskip

$\bigstar$ {\bf Comments}:

\smallskip

1) $\lambda \in \mathbb C$ is an eigenvalue of a square matrix $A$ if and only if it is a root of the characteristic polynomial $\det(aI-A) = p_A(a)$ (Corollary 3 of Chapter 5). The {\it spectral mapping theorem} (Theorem 4) extends this result further to polynomials of $A$.

\medskip

2) The proof of Lemma 6 in this chapter (formula (21) on page 68) used Proposition
\ref{prop_extended_cramer} (see the Comments of Chapter 5).

\medskip

3) On p.72, the fact that that from a certain index on, $N_d$'s become equal can be seen from the following line of reasoning. Assume $N_{d-1} \ne N_d$ while $N_d = N_{d+1}$. For any $x\in N_{d+2}$, we have $(A-aI)x \in N_{d+1} = N_d$. So $x \in N_{d+1}=N_d$. Then we work by induction.

\medskip

4) Theorem 12 can be enhanced to a statement on necessary and sufficient conditions, which leads to the Jordan canonical form (see Appendix \ref{appendix_Jordan_canonical_form} for details).

\bigskip

$\bigstar$ {\bf Supplementary notes}:

\smallskip

1) Minimal polynomial is defined from the algebraic point of view as the generator of the polynomial ring $\{p: p(A)=0\}$. So the powers of  its linear factors are given algebraically. Meanwhile, the index of an eigenvalue is defined from the geometric point of view. Theorem 11 says they are equal.

\medskip

2) As a corollary of Theorem 11, we claim an $n\times n$ matrix $A$ can be diagonalized over the field $\mathbb F$
if and only if its minimal polynomial can be decomposed into the
product of distinct linear factors (polynomials of degree 1 over the field $\mathbb
F$). Indeed, by the uniqueness of minimal polynomial, we have
\begin{eqnarray*}
&  & \mbox{$m_A$ is the product of distinct linear factors}
\\
&\Longleftrightarrow & \mathbb F^n = \bigoplus_{j=1}^k N_1(a_j) \\
&\Longleftrightarrow & \mbox{$\mathbb F^n$ has a basis $\{x_i\}_{i=1}^n$ consisting of eigenvectors of $A$} \\
&\Longleftrightarrow & \mbox{$A$ can be diagonalized by the matrix $U = (x_1, \cdots, x_n)$, such that }
U^{-1}AU = \mbox{diag} \{\lambda_1, \cdots, \lambda_n\}.
\end{eqnarray*}

The above sequence of equivalence also gives the {\bf steps to diagonalize a matrix $A$}：

i) Compute the characteristic polynomial $p_A(s) = \det (s I - A)$.

ii) Solve the equation $p_A(s)=0$ in $\mathbb F$ to obtain the eigenvalues of $A$: $a_1$, $\cdots$, $a_k$.

iii) For each $a_j$ ($j=1,\cdots, k$), solve the homogenous equation $(a_j I - A)x=0$ to obtain the eigenvectors pertaining to $a_j$: $x_{j1}$, $\cdots$, $x_{j m_j}$, where $m_j = \dim N_1(a_j)$.

iv) If $\sum_{j=1}^k m_j < n$, $A$ cannot be diagonalized in $\mathbb F$. If $\sum_{j=1}^k m_j = n$, $A$ can be diagonalized by the matrix
\[
U = (x_{11}, \cdots, x_{1m_1}, x_{21}, \cdots, x_{2m_2}, \cdots, x_{k1}, \cdots, x_{km_k}),
\]
such that $U^{-1}AU=\mbox{diag} \{\underbrace{a_1, \cdots, a_1}_{\dim N_1(a_1)}, \cdots, \underbrace{a_k, \cdots, a_k}_{\dim N_1(a_k)}\}$.

\medskip

3) Suppose $X$ is a linear subspace of $\mathbb F^n$ that is invariant under the $n\times n$ matrix $A$. If $A$ can be diagonalized, the matrix corresponding to $A|_{X}$ can also be diagonalized. This is due to the observation that $X = \bigoplus_{j=1}^k (N_1(a_j) \cap X)$.

\medskip

4) We summarize several relationships among index, algebraic multiplicity, geometric multiplicity, and the dimension of the space of generalized eigenvectors pertaining to a given eigenvalue. The first result is an elementary proof of Lemma 10, Chapter 9, page 132.

\begin{prop}[{\bf Geometric and algebraic multiplicities}]\label{prop_geometric_algebraic_multiplicities}
Let $A$ be an $n\times n$ matrix over a field $\mathbb F$ and $\alpha$ an
eigenvalue of $A$. If $m(\alpha)$ is the multiplicity of $\alpha$ as a root of
the characteristic polynomial $p_A$ of $A$, then $\dim N_1(\alpha) \le m(\alpha)$.

$m(\alpha)$ is called the {\bf algebraic multiplicity} of $\alpha$; $\dim N_1(\alpha)$ is called the {\bf geometric multiplicity} of $\alpha$ and is the dimension of the linear space spanned by the eigenvectors pertaining to $\alpha$. So this result says
``geometric multiplicity $\dim N_1(\alpha)$ $\le$ algebraic multiplicity $m(\alpha)$".
\end{prop}
\begin{proof}
Let $v_1,\cdots, v_s$ be a basis of $N_1(\alpha)$ and extend it to a
basis of $\mathbb F^n$: $v_1$, $\cdots$, $v_s$, $u_1$, $\cdots$,
$u_r$. Define $U = (v_1,\cdots, v_s, u_1,\cdots, u_r)$. Then
\begin{eqnarray*}
 U^{-1}AU &=& U^{-1}A(v_1,\cdots, v_s,u_1,\cdots, u_r)\\
 & = &
U^{-1}(\alpha v_1,\cdots, \alpha v_s, Au_1,\cdots, Au_r) \\
&=& (\alpha U^{-1}v_1,\cdots, \alpha U^{-1}v_s, U^{-1}Au_1,\cdots, U^{-1}Au_r).
\end{eqnarray*}
Because $U^{-1}U = I$, we must have $U^{-1}AU = \left[\begin{matrix} \alpha I_{s\times
s} & B \\ 0 & C \end{matrix}\right]$ and $\det(\lambda I - A) = \det
(\lambda I - U^{-1}AU) = \det \left[\begin{matrix} (\lambda - \alpha) I_{s\times
s} & -B \\ 0 & \lambda I_{(n-s)\times (n-s)} - C \end{matrix}\right] = (\lambda - \alpha)^s \det(\lambda I -
C)$.\footnote{For the last equality, see, for example, Munkres
\cite[page~24]{Munkres97}, Problem 6, or 蓝以中\cite[page~173]{Lan02a}.} So $s\le m$.
\end{proof}

We continue to use the notation from Proposition \ref{prop_geometric_algebraic_multiplicities}, and we define $d(\alpha)$ as the index of $\alpha$. Then we have
\begin{prop}[{\bf Index and algebraic multiplicity}]\label{prop_index_algebraic_multiplicity}
$d(\alpha) \le m(\alpha)$.
\end{prop}
\begin{proof}
Let $q(s)=p_A(s)/(s-\alpha)^m$. Then $\mathbb C^n = N_{p_A}=N_q\bigoplus
N_m(\alpha)$. For any $v\in N_{m+1}(\alpha)$, $v$ can be uniquely written as
$v=v'+v''$ with $v'\in N_q$ and $v''\in N_m(\alpha)$. Then $v'=v-v'' \in
N_{m+1}(\alpha)\cap N_q$. Similar to the second part of the proof of
Lemma 9, we can show $v'=0$. So $v=v'' \in N_m(\alpha)$. This shows
$N_{m+1}(\alpha) = N_m(\alpha)$ and hence $d(\alpha) \le m(\alpha)$.
\end{proof}

Using the notations from Propositions \ref{prop_geometric_algebraic_multiplicities} and \ref{prop_index_algebraic_multiplicity}, we have
\begin{prop}[{\bf Algebraic multiplicity and the dimension of the space of generalized eigenvectors}]
$m(\alpha)=\dim N_{d(\alpha)}(\alpha)$.
\end{prop}
\begin{proof}See Theorem 11 of Chapter 9, page 133.\end{proof}

In summary, we have
\[
\boxed{ \dim N_1(\alpha), d(\alpha) \le m(\alpha) = \dim N_{d(\alpha)}(\alpha). }
\]
In words, it becomes
\begin{eqnarray*}
& & \mbox{geometric multiplicity of $\alpha$, index of $\alpha$} \\
&\le& \mbox{algebraic multiplicity of $\alpha$ as a root of the characteristic polynomial}\\
& = & \mbox{dim. of the space of
generalized eigenvectors pertaining to $\alpha$}.
\end{eqnarray*}


\bigskip

\noindent $\blacktriangleright$ 1. (page 63) Calculate $f_{32}$.
\begin{proof} $f_{32}=a_1^{32}/\sqrt{5}= 2178309$. \end{proof}

\noindent $\blacktriangleright$ 2. (page 65) (a) Prove that if $A$ has $n$ distinct eigenvalues $a_j$ and all of them are less than one in absolute value, then all $h$ in $\mathbb C^n$
\[
A^Nh \to 0, \; \mbox{as $N \to \infty$},
\]
that is, all components of $A^Nh$ tend to zero.

(b) Prove that if all $a_j$ are greater than one in absolute value, then for all $h\ne 0$,
\[
A^Nh \to \infty, \; \mbox{as $N \to \infty$},
\]
that is, some components of $A^Nh$ tend to infinity.

\smallskip
(a)
\begin{proof} Denote by $h_j$ the eigenvector corresponding to the eigenvalue $a_j$. For any $h\in \mathbb C^n$, there exist $\theta_1,\cdots,\theta_n \in \mathbb C$ such that $h=\sum_j\theta_j h_j$. So $A^Nh=\sum_j\theta_j a_j^Nh_j$. Define $b=\max\{|a_1|,\cdots,|a_n|\}$. Then for any $1\le k \le n$, $|(A^Nh)_k|=|\sum_j\theta_ja_j^N(h_j)_k| \le b^N\sum_j|\theta_j||(h_j)_k|\to 0$, as $N\to\infty$, since $0\le b<1$. This shows $A^Nh\to 0$ as $N\to\infty$.
\end{proof}

(b) \begin{proof} We use the same notation as in part (a). Since $h\ne 0$, there exists some $k_0 \in \{1, \cdots, n\}$ so that the $k_0$th coordinate of $h$ satisfies $h_{k_0}=\sum_j\theta_{j}(h_{j})_{k_0}\ne 0$. Then $|(A^Nh)_{k_0}|=|\sum_j\theta_ja_j^N(h_j)_{k_0}|$. Define $b_1=\max_{1\le i \le n}\{|a_i|:\theta_i\ne 0, (h_{i})_{k_0}\ne 0 \}$. Then $b_1>1$ and $|(A^Nh)_{k_0}| = |b_1|^N\left|\sum_{i=1}^n\theta_i\frac{a_i^N}{b_1^N}(h_i)_{k_0}\right|\to \infty$ as $N\to \infty$.
\end{proof}

\noindent $\blacktriangleright$ 3. (page 66) Verify for the matrices discussed in Examples 1 and 2,
\[
\left(
  \begin{array}{cc}
    3 & 2 \\
    1 & 4 \\
  \end{array}
\right)
\;\mbox{and}\; \left(
                 \begin{array}{cc}
                   0 & 1 \\
                   1 & 1 \\
                 \end{array}
               \right),
\]
that the sum of the eigenvalues equals the trace, and their product is the determinant of the matrix.
\begin{proof} The verification is straightforward. \end{proof}

\noindent $\blacktriangleright$ 4. (page 69) Verify (25) by induction on $N$.
\begin{proof}
Formula (24) gives us $Af = af+h$, which is formula (25) when $N=1$. Suppose (25) holds for any $n\le N$, then $A^{N+1}f=A(A^Nf)=A(a^Nf+Na^{N-1}h)=a^NAf+Na^{N-1}Ah=a^N(af+h)+Na^{N-1}ah=a^{N+1}f+(N+1)a^{N}h$. So (25) also holds for $N+1$. By induction, (25) holds for any $N \in \mathbb N$.
\end{proof}

\noindent $\blacktriangleright$ 5. (page 69) Prove that for any polynomial $q$,
\[
q(A)f = q(a)f + q'(a)h,
\]
where $q'$ is the derivative of $q$ and $f$ satisfies (22).
\begin{proof}
Suppose $q(s)=\sum_{i=0}^nb_is^i$, then by formula (25), $q(A)f=\sum_{i=0}^nb_iA^if=\sum_{i=0}^nb_i(a^if+ia^{i-1}h)=(\sum_{i=0}^nb_ia^i)f+(\sum_{i=1}^nib_ia^{i-1})h=q(a)f+q'(a)h$.
\end{proof}

\noindent $\blacktriangleright$ 6. (page71) Prove (32) by induction on $k$.
\begin{proof}
By Lemma 9, $N_{p_1\cdots p_k}=N_{p_1}\oplus N_{p_2\cdots p_k} = N_{p_1} \oplus (N_{p_2} \oplus N_{p_3\cdots p_k})=N_{p_1} \oplus N_{p_2} \oplus N_{p_3\cdots p_k}=\cdots=N_{p_1}\oplus N_{p_2} \oplus \cdots \oplus N_{p_k}$.
\end{proof}

\noindent $\blacktriangleright$ 7. (page 73) Show that $A$ maps $N_d$ into itself.
\begin{proof} For any $x\in N_d(a)$, we have $(A-aI)^d(Ax)=(A-aI)^{d+1}x+a(A-aI)^dx=0$. So $Ax \in N_d(a)$.
\end{proof}

\noindent $\blacktriangleright$ 8. (page 73) Prove Theorem 11.
\begin{proof}
A number is an eigenvalue of $A$ if and only if it's a root of the characteristic polynomial $p_A$. So $p_A(s)$ can be
written as $p_A(s)=\prod_{1}^k(s-a_i)^{m_i}$ with each $m_i$ a positive integer $(i=1,\cdots,k)$. We have shown in the text that $p_A$ is a multiple of $m_A$, so we can assume $m_A(s)=\prod_{i=1}^k(s-a_i)^{r_i}$ with each $r_i$ satisfying $0\le r_i\le m_i$ ($i=1,\cdots,k$). We argue $r_i=d_i$
for any $1\le i \le k$.

Indeed, we have
\[
\mathbb C^n = N_{p_A} = \bigoplus_{j=1}^k N_{m_j}(a_j) = \bigoplus_{j=1}^k N_{d_j}(a_j).
\]
where the last equality comes from the observation $N_{m_j}(a_j) \subseteq
N_{m_j + d_j}(a_j) = N_{d_j}(a_j)$ by the definition of $d_j$. This shows the polynomial $\prod_{j=1}^k(s-a_j)^{d_j} \in \wp := \{\mbox{polynomials } p: p(A)=0\}$. By the definition of minimal polynomial, $r_j \le d_j$ for $j=1,\cdots, n$.

 Assume for some $j$, $r_j < d_j$, we can then find $x \in N_{d_j}(a_j)\setminus N_{r_j}(a_j) $ with $x\ne 0$. Define $q(s)=\prod_{i=1,i\ne j}^k(s-a_i)^{r_i}$, then by Corollary 10 $x$ can be uniquely decomposed into $x'+x''$ with $x' \in N_{q}$ and $x'' \in N_{r_j}(a_j)$. We have $0=(A-a_jI)^{d_j}x=(A-a_jI)^{d_j}x'+0$. So $x'\in N_q\cap N_{d_j}(a_j)=\{0\}$. This implies $x=x'' \in N_{r_j}(a_j)$. Contradiction. Therefore, $r_i \ge d_i$ for any $1\le i \le k$.

Combined, we conclude $m_A(s) = \prod_{i=1}^k(s-a_i)^{d_i}$.
\end{proof}
\begin{remark}
Along the way, we have shown that the index $d$ of an eigenvalue is no greater than the algebraic multiplicity of the eigenvalue in the characteristic polynomial. Also see Proposition \ref{prop_index_algebraic_multiplicity}.
\end{remark}

\noindent $\blacktriangleright$ 9. (page 75) Prove Corollary 15.
\begin{proof}The extension is straightforward as the key feature of the proof, ``$B$ maps $N^{(j)}$ into $N^{(j)}$", remains the same regardless of the number of linear maps, as far as they commute pairwise. \end{proof}

\noindent $\blacktriangleright$ 10. (page 76) Prove Theorem 18.
\begin{proof} For any $i\in\{1,\cdots,n\}$, by
Theorem 17, $(l_i,x_j)=0$ for any $j\ne i$. Since
$x_1$, $\cdots$, $x_n$ span the whole space and
$l_i\ne 0$, we must have$(l_i,x_i)\ne 0$, $i=1,\cdots,n$. This proves (a) of Theorem 18. For (b), we note if $ x = \sum_{j=1}^k
k_jx_j$, then $(l_i,x) = k_i(l_i,x_i)$. So
$k_i=(l_i, x)/(l_i,x_i)$.
\end{proof}

\noindent $\blacktriangleright$ 11. (page 76) Take the matrix
\[
\left(
  \begin{array}{cc}
    0 & 1 \\
    1 & 1 \\
  \end{array}
\right)
\]
from equation $(10)'$ of Example 2.

(a) Determine the eigenvector of its transpose.

(b) Use formulas (44) and (45) to determine the expansion of the vector (0,1)' in terms of the eigenvectors of the original matrix. Show that your answer agrees with the expansion obtained in Example 2.

\smallskip

(a)
\begin{proof}
The matrix is symmetric, so it's equal to its transpose and the eigenvectors are the same: for eigenvalue $a_1=\frac{1+\sqrt{5}}{2}$, the eigenvector is $h_1= \left[\begin{matrix}1 \\ a_1 \end{matrix}\right]$; for eigenvalue $a_2=\frac{1-\sqrt{5}}{2}$, the eigenvector is $h_2=\left[\begin{matrix}1 \\ a_2 \end{matrix}\right]$.
\end{proof}
(b)
\begin{proof}
We note $(h_1, h_1) = 1+a_1^2 = \frac{5+\sqrt{5}}{2}$ and $(h_2,h_2)=1+a_2^2 = \frac{5-\sqrt{5}}{2}$. For $x=\left[\begin{matrix} 0 \\ 1 \end{matrix} \right]$, we have $(h_1,x)=a_1$ and $(h_2,x)=a_2$. So using formula (44) and (45), $x=c_1h_1+c_2h_2$ with
\[
c_1=a_1/\frac{5+\sqrt{5}}{2} = 1/\sqrt{5},\; c_2 = a_2 / \frac{5-\sqrt{5}}{2} = -1/\sqrt{5}.
\]
This agrees with the expansion obtained in Example 2.
\end{proof}

\noindent $\blacktriangleright$ 12. (page 76) In Example 1 we have determined the eigenvalues and corresponding eigenvector of the matrix
\[
\left(
  \begin{array}{cc}
    3 & 2 \\
    1 & 4 \\
  \end{array}
\right)
\]
as $a_1 = 2$, $h_1 = \left(
  \begin{array}{cc}
    2 \\
    -1\\
  \end{array}
\right)$, and $a_2 = 5$, $h_2 = \left(
  \begin{array}{cc}
    1 \\
    1 \\
  \end{array}
\right)$.

Determine eigenvectors $l_1$ and $l_2$ of its transpose and show that
\[
(l_i, h_j) = \begin{cases}
0 & \mbox{for $i\ne j$} \\
\ne 0 & \mbox{for $i=j$}
\end{cases}
\]
\begin{proof}
The transpose of the matrix has the same eigenvalues $a_1=2$, $a_2=5$. Solving the equation $\left[\begin{matrix}3 & 1 \\ 2 & 4 \end{matrix}\right]\left[\begin{matrix}x\\y\end{matrix}\right]=2\left[\begin{matrix}x\\y\end{matrix}\right]$, we have $l_1=\left[\begin{matrix}1 & -1 \end{matrix}\right]$. Solving the equation $\left[\begin{matrix}3 & 1 \\ 2 & 4 \end{matrix}\right]\left[\begin{matrix}x\\y\end{matrix}\right]=5\left[\begin{matrix}x\\y\end{matrix}\right]$, we have $l_2=\left[\begin{matrix}1 & 2 \end{matrix}\right]$. Then it's easy to calculate $(l_1,h_1)=3$, $(l_1,h_2)=0$, $(l_2,h_1)=0$, and $(l_2,h_2)=3$.
\end{proof}

\noindent $\blacktriangleright$ 13. (page 76) Show that the matrix
\[
A = \left(
  \begin{array}{ccc}
    0 & 1 & 1 \\
    1 & 0 & 1 \\
    1 & 1 & 0 \\
  \end{array}
\right)
\]
has -1 as an eigenvalue. What are the other two eigenvalues?
\begin{solution}
\begin{eqnarray*}
\det (\lambda I -A) &=& \det \left[\begin{matrix}\lambda & -1 & -1 \\ -1 & \lambda & -1 \\ -1 & -1 & \lambda \end{matrix}\right]
=\det \left[
\begin{matrix}
0 & -1-\lambda & -1+\lambda^2 \\
0 & \lambda + 1 & -1-\lambda \\
-1 & -1 & \lambda
\end{matrix}
\right] \\
&=& -[(\lambda+1)^2-(\lambda^2-1)(\lambda+1)] = (\lambda+1)^2(\lambda-2).
\end{eqnarray*}
So the eigenvalues of $A$ are $-1$ and $2$, and the eigenvalue $2$ has a multiplicity of $2$.
\end{solution}

\section{Euclidean Structure}

The book's own solution gives answers to Ex 1, 2, 3, 5, 6, 7, 8, 14, 17, 19, 20.

\bigskip

$\bigstar$ {\bf Erratum}: In the {\it Note} on page 92, the infinite-dimensional version of Theorem 15 is Theorem 5 in Chapter 15, not ``Theorem 15".

\bigskip

\noindent $\blacktriangleright$ 1. (page 80) Prove Theorem 2.
\begin{proof}
By letting $y=\frac{x}{|\!|x|\!|}$, we get $|\!|x|\!|\le \max_{|\!|y|\!|=1}(x,y)$. By Schwartz Inequality,
$\max_{|\!|y|\!|=1}(x,y)\le |\!|x|\!|$. Combined, we must have $|\!|x|\!|=\max_{|\!|y|\!|=1}(x,y)$.
\end{proof}

\noindent $\blacktriangleright$ 2. (page 87) Prove Theorem 11.
\begin{proof}
$\forall x,y$ and suppose their decomposition are $x_1+x_2$,
$y_1+y_2$, respectively. Here $x_1,y_1\in Y$ and $x_2,y_2\in
Y^{\perp}$. Then
$(P_Y^*y,x)=(y, P_Yx)=(y_1+y_2,x_1)=(y_1,x_1)=(y_1,x)=(P_Yy,x)$.
By the arbitrariness of $x$ and $y$, $P_Y=P_Y^*$.
\end{proof}

\noindent $\blacktriangleright$ 3. (page 89) Construct the matrix representing reflection of points in $\mathbb R^3$ across the plane $x_3=0$. Show that the determinant of this matrix is $-1$.
\begin{proof}
Under the reflection across the plane $\{(x_1,x_2,x_3): x_3=0\}$,
point $(x_1,x_2,x_3)$ will be mapped to $(x_1,x_2,-x_3)$. So the
corresponding matrix is
$\left(\begin{matrix}
1 & 0 & 0 \\
0 & 1 & 0 \\
0 & 0 & -1 \\
\end{matrix}\right)$, whose determinant is $-1$.
\end{proof}

\noindent $\blacktriangleright$ 4. (page 89) Let $R$ be reflection across any plane in $\mathbb R^3$.

(i) Show that $R$ is an isometry.

(ii) Show that $R^2 = I$.

(iii) Show that $R^* = R$.
\begin{proof}
Suppose the plane $L$ is determined by the equation $Ax+By+Cz=D$. For any point $x=(x_1,x_2,x_3)'\in \mathbb R^3$, we first find $y=(y_1,y_2,y_3)'\in L$ such that the line segment $xy \perp L$. Then $y$ must satisfy the following equations
\[
\begin{cases}
Ay_1+By_2+Cy_3=D\\
(y_1-x_1,y_2-x_2,y_3-x_3)=k(A,B,C)
\end{cases}
\]
where $k$ is some constant. Solving the equations gives us $k = \frac{D-(Ax_1+Bx_2+Cx_3)}{A^2+B^2+C^2}$ and
\[
\left[\begin{matrix} y_1 \\ y_2 \\ y_3\end{matrix} \right] = \left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right]+k\left[\begin{matrix}A \\ B \\ C\end{matrix}\right] = \left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right] -\frac{1}{A^2+B^2+C^2}\left[\begin{matrix}A^2 & AB & AC\\
AB & B^2 & BC \\ CA & CB & C^2 \end{matrix}\right]\left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right]+ \frac{D}{A^2+B^2+C^2}\left[\begin{matrix}A \\ B\\ C\end{matrix}\right]
\]
So the symmetric point $z=(z_1,z_2,z_3)'$ of $x$ with respect to $L$ is given by
\begin{eqnarray*}
\left[\begin{matrix}z_1\\z_2\\z_3\end{matrix}\right] &=& 2\left[\begin{matrix}y_1\\y_2\\y_3\end{matrix}\right] - \left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right] = \left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right] -\frac{2}{A^2+B^2+C^2}\left[\begin{matrix}A^2 & AB & AC\\
AB & B^2 & BC \\ CA & CB & C^2 \end{matrix}\right]\left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right]+ \frac{2D}{A^2+B^2+C^2}\left[\begin{matrix}A \\ B\\ C\end{matrix}\right] \\
&=& \frac{1}{A^2+B^2+C^2} \left[\begin{matrix}-A^2+B^2+C^2 & -2AB & -2AC \\ -2AB & A^2-B^2+C^2 & -2BC \\ -2CA & -2CB & A^2+B^2-C^2\end{matrix}\right]\left[\begin{matrix}x_1\\x_2\\x_3\end{matrix}\right] + \frac{2D}{A^2+B^2+C^2}\left[\begin{matrix}A \\ B\\ C\end{matrix}\right].
\end{eqnarray*}
To make the reflection $R$ a linear mapping, it's necessary and sufficient that $D=0$. So the problem's statement should be corrected to ``let $R$ be reflection across any plane in $\mathbb R^3$ that contains the origin". Then
\[
R= \frac{1}{A^2+B^2+C^2} \left[\begin{matrix}-A^2+B^2+C^2 & -2AB & -2AC \\ -2AB & A^2-B^2+C^2 & -2BC \\ -2CA & -2CB & A^2+B^2-C^2\end{matrix}\right].
\]
$R$ is symmetric, so $R^*=R$ and by plain calculation, we have $R^*R=R^2=I$. By Theorem 12, $R$ is an isometry.
\end{proof}

\noindent $\blacktriangleright$ 5. (page 89) Show that a matrix $M$ is orthogonal iff its rows are pairwise orthogonal unit vectors.
\begin{proof} Suppose $M$ is an $n\times n$
orthogonal matrix. Let $r_1, \cdots, r_n$ be its column vectors.
Then
\begin{eqnarray*}
I = MM^T = \left[\begin{matrix}r_1\\
\cdots \\ r_n\end{matrix}\right] [r_1^T,\cdots, r_n^T] =
\left[
\begin{matrix}
r_1r_1^T & r_1r_2^T & \cdots & r_1r_n^T \\
\cdots &   \cdots   &  \cdots & \cdots \\
r_nr_1^T & r_nr_2^T & \cdots & r_nr_n^T
\end{matrix}
\right].
\end{eqnarray*}
So $M$ is orthogonal if and only if  $r_ir_j^T=\delta_{ij}$ ($1\le i, j \le n$).
\end{proof}

\noindent $\blacktriangleright$ 6. (page 90) Show that $|a_{ij}| \le |\!| A |\!|$.
\begin{proof}
Note $|a_{ij}|=\mbox{sign}(a_{ij})\cdot e_i^T A e_j$, where $e_k$ is the column vector that has 1 as the $k$-th entry and 0 elsewhere. Then we apply (ii) of Theorem 13.
\end{proof}

\noindent $\blacktriangleright$ 7. (page 94) Show that $\{ A_n \}$ converges to $A$ iff for all $x$, $A_n x$ converges to $Ax$.
\begin{proof} The key is the space $X$ being finite dimensional. See the solution in the textbook. \end{proof}

\noindent $\blacktriangleright$ 8. (page 95) Prove the Schwarz inequality for complex linear spaces with a Euclidean structure.
\begin{proof} For any $x,y \in X$ and $a\in \mathbb
C$, $0\le
|\!|x-ay|\!|=|\!|x|\!|^2-2\mbox{Re}(x,ay)+|a|^2|\!|y|\!|^2$. Let $a
=\frac{(x,y)}{|\!|y|\!|^2}$ (assume $y\ne 0$), then we have
\[
0\le |\!|x|\!|^2 -
2\mbox{Re}\left\{\frac{\overline{(x,y)}}{|\!|y|\!|^2}(x,y)\right\} +
\frac{|(x,y)|^2}{|\!|y|\!|^2},
\]which gives after simplification $|(x,y)|\le |\!|x|\!||\!|y|\!|$.
\end{proof}

\noindent $\blacktriangleright$ 9. (page 95) Prove the complex analogues of Theorem 6, 7, and 8.
\begin{proof}Proofs are the same
as the ones for the real Euclidean space.
\end{proof}

\noindent $\blacktriangleright$ 10. (page 95) Prove the complex analogue of Theorem 9.
\begin{proof}
Proof is the same as the one for real Euclidean space.
\end{proof}

\noindent $\blacktriangleright$ 11. (page 96) Show that a unitary map $M$ satisfies the relations
\[
M^*M=I
\]
and, conversely, that every map $M$ that satisfies (45) is unitary.
\begin{proof}
If $M$ is a unitary map, then by parallelogram law, $M$ preserves
inner product. So $\forall x,y$, $(x, M^*My)=(Mx,My)=(x,y)$. Since
$x$ is arbitrary, $M^*My=y$, $\forall y \in X$. So $M^*M=I$.
Conversely, if $M^*M=I$, $(x,x)=(x,M^*Mx)=(Mx,Mx)$. So $M$ is an
isometry.
\end{proof}

\noindent $\blacktriangleright$ 12. (page 96) Show that if $M$ is unitary, so is $M^{-1}$ and $M^*$.
\begin{proof} $(M^{-1}x,M^{-1}x)=(M(M^{-1}x),
M(M^{-1}x))=(x,x)$. $(Mx,Mx)=(x,x)=(M^*Mx,M^*Mx)$. By $R_M=X$,
$(y,y)=(M^*y, M^*y)$, $\forall y \in X$. So $M^{-1}$ and $M^*$ are
both unitary.
\end{proof}

\noindent $\blacktriangleright$ 13. (page 96) Show that the unitary maps form a group under multiplication.
\begin{proof} If $M$, $N$ are two unitary maps, then $(MN)^*(MN)=N^*M^*MN=N^*N=I$. So the set of unitary maps
is closed under multiplication. Exercise 12 shows that each unitary map has a unitary inverse. So the set of unitary maps is
a group under multiplication.\end{proof}

\noindent $\blacktriangleright$ 14. (page 96) Show that for a unitary map $M$, $|\det M|=1$.
\begin{proof}By Exercise 8 of Chapter 5, $\det
M^*=\det \overline{M}^T=\overline{\det M}$. So by $M^*M=I$, we have
\[
1= \det M^* \det M = |\det M|^2,
\]
i.e. $|\det M| =1$.\end{proof}

\noindent $\blacktriangleright$ 15. (page 96) Let $X$ be the space of continuous complex-valued functions on $[-1, 1]$ and define the scalar product in $X$ by
\[
(f, g) = \int_{-1}^1 f(s) \bar g(s) ds.
\]
Let $m(s)$ be a continuous function of absolute value $1$: $|m(s)|=1$, $-1 \le s \le 1$.

Define $M$ to be multiplication by $m$:
\[
(Mf)(s) = m(s)f(s).
\]
Show that $M$ is unitary.
\begin{proof}$(Mf,
Mf)=\int_{-1}^1Mf(s)\overline{Mf(s)}ds = \int_{-1}^1m(s)f(s)\bar
m(s) \bar f(s)ds = \int_{-1}^1|m(s)|^2|f(s)|^2ds=(f,f)$. This shows $M$ is unitary. \end{proof}

\noindent $\blacktriangleright$ 16. (page 98) Prove the following analogue of (51) for matrices with complex entries:
\[
|\!| A |\!| \le \left(\sum_{i,j} |a_{ij}|^2 \right)^{1/2}.
\]
\begin{proof} The proof is very similar to that of real case, so we omit the details. Note we need the complex version of Schwartz inequality (Exercise 8). \end{proof}

\noindent $\blacktriangleright$ 17. (page 98) Show that
\[
\sum_{i,j}|a_{ij}|^2 = \mbox{tr} AA^*.
\]
\begin{proof}
We have
\[
(AA^*)_{ij} = [a_{i1}, \cdots, a_{in}]\left[\begin{matrix}\bar a_{j1} \\ \cdots \\ \bar a_{jn}\end{matrix}\right] = \sum_{k=1}^na_{ik}\bar a_{jk}.
\]
So $(AA^*)_{ii}=\sum_{k=1}^n|a_{ik}|^2$ and $\mbox{tr}(AA^*)=\sum_{i,j}|a_{ij}|^2$.
\end{proof}

\noindent $\blacktriangleright$ 18. (page 99) Show that
\[
\mbox{tr} AA^* = \mbox{tr} A^*A.
\]
\begin{proof} This is straightforward from the result of Exercise 17. \end{proof}

\noindent $\blacktriangleright$ 19. (page 99) Find an upper bound and a lower bound for the norm of the $2\times 2$ matrix
\[
A = \left(
      \begin{array}{cc}
        1 & 2 \\
        0 & 3 \\
      \end{array}
    \right)
\]

The quantity $\left(\sum_{i,j} |a_{ij}|^2 \right)^{1/2}$ is called the {\it Hilbert-Schmidt norm} of the matrix $A$.
\begin{solution}
Suppose $\lambda_1$ and $\lambda_2$ are two eigenvalues of $A$. Then by Theorem 3 of Chapter 6, $\lambda_1+\lambda_2 = \mbox{tr} A = 4$ and $\lambda_1\lambda_2=\det A = 3$. Solving the equations gives us $\lambda_1=1$, $\lambda_2=3$. By formula (46), $|\!|A|\!| \ge 3$. According to formula (51), we have $|\!|A|\!| \le \sqrt{1^2+2^2+3^2}=\sqrt{14}$. Combined, we have $3\le |\!|A|\!| \le \sqrt{14}\approx 3.7417$.
\end{solution}

\noindent $\blacktriangleright$ 20. (page 99) (i) $w$ is a {\it bilinear} function of $x$ and $y$. Therefore we write $w$ as a {\it product} of $x$ and $y$, denoted as
\[
w = x \times y,
\]
and called the {\it cross product}.

(ii) Show that the cross product is antisymmetric:
\[
y \times x = - x \times y.
\]

(iii) Show that $x \times y$ is orthogonal to both $x$ and $y$.

(iv) Let $R$ be a rotation in $\mathbb R^3$; show that
\[
(Rx) \times (Ry) = R(x\times y).
\]

(v) Show that
\[
|\!| x \times y |\!| = \pm |\!| x |\!| |\!| y |\!| \sin \theta,
\]
where $\theta$ is the angle between $x$ and $y$.

(vi) Show that
\[
\left(
  \begin{array}{c}
    1 \\
    0 \\
    0 \\
  \end{array}
\right)
\times
\left(
  \begin{array}{c}
    0 \\
    1 \\
    0 \\
  \end{array}
\right)
=
\left(
  \begin{array}{c}
    0 \\
    0 \\
    1 \\
  \end{array}
\right).
\]

(vii) Using Exercise 16 in Chapter 5, show that
\[
\left(
  \begin{array}{c}
    a \\
    b \\
    c \\
  \end{array}
\right)
\times
\left(
  \begin{array}{c}
    d \\
    e \\
    f \\
  \end{array}
\right)
=
\left(
  \begin{array}{c}
    bf - ce \\
    cd - af \\
    ae - bd \\
  \end{array}
\right).
\]
\smallskip

(i)
\begin{proof}For any $\alpha_1$, $\alpha_2 \in \mathbb F$, we have
\begin{eqnarray*}
(w(\alpha_1 x_1+\alpha_2 x_2,y),z) &=& \det(\alpha_1 x_1+\alpha_2 x_2,y,z)= \alpha_1 \det(x_1,y,z)+\alpha_2\det (x_2,y,z) \\
&=& \alpha_1 (w(x_1,y),z)+\alpha_2(w(x_2,y),z) = (\alpha_1w(x_1,y)+\alpha_2w(x_2,y),z).
\end{eqnarray*}
Since $z$ is arbitrary, we necessarily have $w(\alpha_1 x_1 + \alpha_2 x_2, y) = \alpha_1w(x_1,y)+\alpha_2w(x_2,y)$. Similarly, we can prove $w(x,\alpha_1 y_1 + \alpha_2 y_2) = \alpha_1w(x,y_1)+\alpha_2w(x,y_2)$. Combined, we have proved $w$ is a bilinear function of $x$ and $y$.
\end{proof}

(ii) \begin{proof}
We note
\[
(w(x,y),z)=\det (x,y,z) = -\det (y,x,z)=-(w(y,x),z)=(-w(y,x),z).
\]
By the arbitrariness of $z$, we conclude $w(x,y)=-w(y,x)$, i.e. $y\times x = -x\times y$.
\end{proof}

(iii) \begin{proof}
Since $(w(x,y),x) = \det (x,y, x) =0$ and $(w(x,y),y) = \det (x,y, y) = 0$, $x\times y$ is orthogonal to both $x$ and $y$.
\end{proof}

(iv)\begin{proof} We suppose every vector is in column form and $R$ is the matrix that represents a rotation. Then
\[
(Rx \times Ry, z) = \det (Rx, Ry, z) = (\det R)\cdot \det(x,y,R^{-1}z)
\]
and
\[
(R(x\times y),z)=(R(x\times y))^Tz = (x\times y)^TR^Tz = (x\times y, R^Tz) = \det(x,y,R^Tz).
\]
A rotation is isometric, so $R^T=R^{-1}$ and $\det R=\pm 1$. Combing the above two equations gives us $\pm(Rx \times Ry, z)=(R(x\times y),z)$. Since $z$ is arbitrary, we must have $\pm Rx\times Ry = R(x\times y)$.
\end{proof}

(v)\begin{proof} In the equation $\det(x,y,z)=(x\times y, z)$, we set $z=x\times y$. Since the geometrical meaning of $\det (x,y,z)$ is the signed volume of a parallelogram determined by $x$, $y$, $z$, and since $z=x\times y$ is perpendicular to $x$ and $y$, we have $\det(x,y,z)= \pm |\!|x|\!||\!|y|\!|\sin\theta |\!|z|\!|$, where $\theta$ is the angle between $x$ and $y$. Then by $(x\times y, z)=|\!|z|\!|^2$, we conclude $|\!|x\times y |\!| = |\!|z|\!|=\pm |\!|x|\!||\!|y|\!|\sin\theta$. \end{proof}

(vi) \begin{proof}
\[
1= \det \left[\begin{matrix}1 & 0 & 0 \\ 0 & 1 & 0 \\ 0 & 0 & 1\end{matrix}\right]= (\left[\begin{matrix}1 \\ 0 \\ 0\end{matrix}\right]\times \left[\begin{matrix}0 \\ 1 \\ 0\end{matrix}\right],\left[\begin{matrix}0 \\ 0 \\ 1\end{matrix}\right]).
\]
So $\left[\begin{matrix}1 \\ 0 \\ 0\end{matrix}\right]\times \left[\begin{matrix}0 \\ 1 \\ 0\end{matrix}\right]=\left[\begin{matrix}a \\ b \\ 1\end{matrix}\right]$. By part (iii), we necessarily have $a=b=0$. Therefore, we can conclude $\left[\begin{matrix}1 \\ 0 \\ 0\end{matrix}\right]\times \left[\begin{matrix}0 \\ 1 \\ 0\end{matrix}\right]=\left[\begin{matrix}0 \\ 0 \\ 1\end{matrix}\right]$.
\end{proof}

(vii)\begin{proof} By Exercise 16 of Chapter 5,
\begin{eqnarray*}
\det \left[\begin{matrix}a & d & g \\ b & e & h \\ c & f & i\end{matrix}\right]&=&\det \left[\begin{matrix}a & b & c \\ d & e & f \\ g & h & i\end{matrix}\right] \\
&=& aei+bfg+cdh-gec-hfa-idb\\
&=& (bf-ec)g + (cd-fa)h + (ae-db)i \\
&=& \left[\begin{matrix} bf-ce & cd - af & ae-bd \end{matrix}\right]\left[\begin{matrix}g \\ h \\ i\end{matrix}\right].
\end{eqnarray*}
So we have
\[
\left[
\begin{matrix}
a \\ b\\ c
\end{matrix}
\right]
\times
\left[
\begin{matrix}
d \\ e \\ f
\end{matrix}
\right]
=
\left[
\begin{matrix}
bf-ce \\ cd-af \\ ae-bd
\end{matrix}
\right].
\]
\end{proof}

\noindent $\blacktriangleright$ 21. (page 100) Show that in a Euclidean space every pair of vector satisfies
\[
|\!|u+v|\!|^2 + |\!|u-v|\!|^2 = 2|\!|u|\!|^2+2|\!|v|\!|^2.
\]
\begin{proof}
\begin{eqnarray*}
|\!|u+v|\!|^2 + |\!|u-v|\!|^2 &=& (u+v,u+v)+(u-v,u-v)=(u,u+v)+(v,u+v)+(u,u-v)-(v,u-v)\\
&=& (u,u)+(u,v)+(v,u)+(v,v)+(u,u)-(u,v)-(v,u)+(v,v) = 2|\!|u|\!|^2+2|\!|v|\!|^2.
\end{eqnarray*}
\end{proof}

\section{Spectral Theory of Self-Adjoint Mappings of a Euclidean Space into Itself}

The book's own solution gives answers to Ex 1, 4, 8, 10, 11, 12, 13.

\bigskip

$\bigstar$ {\bf Erratum}: On page 114, formula $(37)'$ should be $a_n = \max_{x\ne 0} \frac{(x,Hx)}{(x,x)}$ instead of $a_n = \min_{x\ne 0} \frac{(x,Hx)}{(x,x)}$.

\bigskip

$\bigstar$ {\bf Comments}:

\smallskip

1) In Theorem 4, the eigenvectors of $H$ can be complex (the proof did not show they are real), although the eigenvalues of $H$ are real.

\medskip

2) The following result will help us understand some details in the proof of Theorem $4'$ (page 108, ``It follows from this easily that we may choose an orthonormal basis consisting of real eigenvectors in each eigenspace $N_a$.")

\begin{prop}
Let $X$ be a conjugate invariant subspace of $\mathbb C^n$ (i.e. $X$ is invariant under conjugate operation). Then we can find a basis of $X$ consisting of real vectors.
\end{prop}
\begin{proof}
We work by induction. First, assume $\dim X=1$. $\forall v\in X$ with $v\ne 0$, we must have $\mbox{Re}v\in X$ and $\mbox{Im}v\in X$. At least one of them is non-zero and can be taken as a basis. Suppose for all conjugate invariant subspaces with dimension no more than $k$ the claim is true. Let $\dim X = k+1$. $\forall v\in X$ with $v \ne 0$. If $\mbox{Re}v$ and $\mbox{Im}v$ are (complex) linearly dependent, there must exist $c\in \mathbb C$ and $v_0\in \mathbb R^n$ such that $v = cv_0$, and we let $Y=\mbox{span}\{v_0\}$; if $\mbox{Re}v$ and $\mbox{Im}v$ are (complex) linearly independent, we let $Y= \mbox{span}\{v,\bar v\}=\mbox{span}\{\mbox{Re}v, \mbox{Im}v\}$. In either case, $Y$ is conjugate invariant. Let $Y^{\perp} = \{x\in X: \sum_{i=1}^nx_i\bar y_i = 0, \forall y = (y_1,\cdots, y_n)'\in Y\}$. Then clearly, $X=Y\bigoplus Y^{\perp}$ and $Y^{\perp}$ is also conjugate invariant. By assumption, we can choose a basis of $Y^{\perp}$ consisting exclusively of real vectors. Combined with the real basis of $Y$, we get a real basis of $X$.
\end{proof}


3) For an elementary proof of Theorem $4'$ by mathematical induction, see 丘维声\cite[page 297]{Qiu96a}, Theorem 5.9.4.

\medskip

4) Theorem 5 (the spectral resolution representation of self-adjoint operators)
can be extended to infinite dimensional space and is phrased as "any self-adjoint operator can be decomposed as the integral w.r.t. orthogonal projections".  See any textbook on functional analysis for details.

\medskip

5) For the second proof of Theorem 4, compare Spivak\cite[page 122]{Spivak65}, Exercise 5-17 and Keener\cite[page 15]{Keener00}, Theorem 1.6 (the ``maximum principle").

\bigskip

$\bigstar$ {\bf Supplementary notes}

In view of the spectral theorem (Theorem 7 of Chapter 6, p.70), the diagonalization of a self-adjoint matrix $A$ is reduced to showing that in the decomposition
\[
\mathbb C^n = N_{d_1}(\alpha_1) \bigoplus \cdots \bigoplus N_{d_k}(\alpha_k),
\]
we must have $d_i(\alpha_i)=1$, $i=1, \cdots, k$. Indeed, assume for some $\alpha$, $d(\alpha)>1$. Then for any $x \in N_2(\alpha)\setminus
N_1(\alpha)$, we have
\[
(\alpha I - A) x \ne 0, (\alpha I - A)^2 x = 0.
\]
But the second equation implies
\[
((\alpha I - A) x, (\alpha I - A) x) = ((\alpha I - A)^2 x, x) = 0.
\]
A contradiction. So we must have $d(\alpha)=1$. This is the substance of the proof of Theorem 4, part (b).


\bigskip

\noindent $\blacktriangleright$ 1. (page 102) Show that
\[
Re (x, Mx) = (x, M_s x).
\]
\begin{proof}
\[
\left(x, \frac{M+M^*}{2}x\right)=\frac{1}{2}[(x,Mx)+(x,M^*x)]=\frac{1}{2}[(x,Mx)+(Mx,x)] = \frac{1}{2}[(x,Mx)+\overline{(x,Mx)}]=\mbox{Re}(x,Mx).
\]
\end{proof}

\noindent $\blacktriangleright$ 2. (page 104) We have described above an algorithm for diagonalizing $q$; implement it as a computer program.
\begin{solution} Skipped for this version.
\end{solution}

\noindent $\blacktriangleright$ 3. (page 105) Prove that
\[
p_+ + p_0 = \max \dim S, \; \mbox{$q\ge 0$ on $S$}
\]
and
\[
p_- + p_0 = \max \dim S, \; \mbox{$q\le 0$ on $S$.}
\]
\begin{proof}We prove $p_++p_0=\max_{q(S)\ge 0}\mbox{dim$S$}$. $p_- + p_0=\max_{q(S)\le 0}\mbox{dim$S$}$ can be proved similarly. We shall use representation (11) for $q$ in terms of the coordinates $z_1$, $\cdots$, $z_n$; suppose we label them so that $d_1$, $\cdots$, $d_p$ are nonnegative where $p=p_++p_0$, and the rest are negative. Define the subspace $S_1$ to consist of all vectors for which $z_{p+1}=\cdots=z_n=0$. Clearly, $\dim S_1=p$ and $q$ is nonnegative on $S_1$. This shows $p_++p_0=p\le \max_{q(S)\ge 0}\dim S$. If $<$ holds, there must exist a subspace $S_2$ such that $q(S_2)\ge 0$ and $\dim S_2>p=p_++p_0$. Define $P: S_2\to S_3:=\{z:z_{p+1}=z_{p+2}=\cdots=z_n=0\}$ by $P(z)=(z_1,\cdot,z_p,0,\cdots,0)$. Since $\dim S_2 > p =\dim S_3$, there exists some $z\in S_2$ such that $z\ne 0$ and $P(z)=0$. This implies $z_1=\cdots=z_p=0$. So $q(z) = \sum_{i=1}^pd_iz_i^2 + \sum_{i=p+1}^nd_iz_i^2 = \sum_{i=p+1}^nd_iz_i^2 <0$, contradiction. Therefore, our assumption is not true and $<$ cannot hold.  \end{proof}

\noindent$\blacktriangleright$ 4. (page 109) Show that the columns of $M$ are the eigenvectors of $H$.
\begin{proof} Write $M$ in the column form $M=[c_1,\cdots,c_n]$ and multiply $M$ to both sides for formula (24)', we get
\[
HM=[Hc_1,\cdots, Hc_n] = MD = [c_1,\cdots,c_n]\left[\begin{matrix}\lambda_1 & 0 & \cdots & 0 \\ 0 & \lambda_2 & \cdots & 0 \\ \cdots & \cdots & \cdots & \cdots \\ 0 & 0 & \cdots & \lambda_n\end{matrix}\right] = [\lambda_1c_1,\cdots, \lambda_nc_n],
\]
where $\lambda_1$, $\cdots$, $\lambda_n$ are eigenvalues of $M$, including multiplicity. So we have $Hc_i=\lambda_ic_i$, $i=1,\cdots,n$. This shows the columns of $M$ are eigenvectors of $H$.
\end{proof}

\noindent $\blacktriangleright$ 5. (page 118) (a) Show that the minimum problem (47) has a nonzero solution $f$.

(b) Show that a solution $f$ of the minimum problem (47) satisfies the equation
\[
Hf = bMf,
\]
where the scalar $b$ is the value of the minimum (47).

(c) Show that the constrained minimum problem
\[
\min_{(y, Mf)=0} \frac{(y, Hy)}{(y, My)}
\]
has a nonzero solution $g$.

(d) Show that a solution $g$ of the minimum problem $(47)'$ satisfies the equation
\[
Hg = cMg,
\]
where the scalar $c$ is the value of the minimum $(47)'$.
\begin{proof}
The essence of the generalization can be summarized as follows: $\langle x, y \rangle = (x, My)$ is an inner product and $M^{-1}H$ is self-adjoint under this new inner product, hence all the previous results apply.

Indeed, $\langle x, y\rangle$ is a bilinear function of $x$ and $y$; it is symmetric since $M$ is self-adjoint; and it is positive since $M$ is positive. Combined, we can conclude $\langle x, y \rangle$ is an inner product.

Because $M$ is positive, $Mx=0$ has a unique solution $0$. So $M^{-1}$ exists. Define $U =M^{-1}H$ and we check $U$ is self-adjoint under the new inner product $\langle\cdot,\cdot\rangle$. Indeed, $\forall x, y \in X$,
\[
\langle Ux, y \rangle = (Ux, My) = (M^{-1}Hx, My) = (Hx, y) = (x,Hy) = (x, MM^{-1}Hy) = (x, MUy) = \langle x, Uy\rangle.
\]
Applying the second proof of Theorem 4, with $(\cdot, \cdot)$ replaced by $\langle \cdot, \cdot \rangle$ and $H$ replaced by $M^{-1}H$, we can verify claims (a)-(d) are true.
\end{proof}

\noindent $\blacktriangleright$ 6.  (page 118) Prove Theorem 11.
\begin{proof} This is just Theorem 4 with $(\cdot, \cdot)$ replaced by $\langle \cdot, \cdot \rangle$ and $H$ replaced by $M^{-1}H$, where $\langle \cdot, \cdot \rangle$ is defined in the solution of Exercise 5. \end{proof}

\noindent $\blacktriangleright$ 7.  (page 118) Characterize the numbers $b_i$ in Theorem 11 by a minimax principle similar to (40).
\begin{solution} This is just Theorem 11 with $(\cdot, \cdot)$ replaced by $\langle \cdot, \cdot \rangle$ and $H$ replaced by $M^{-1}H$, where $\langle \cdot, \cdot \rangle$ is defined in the solution of Exercise 5. \end{solution}

\noindent $\blacktriangleright$ 8.  (page 119)  Prove Theorem $11'$.
\begin{proof} Under the new inner product $\langle \cdot, \cdot \rangle = ( \cdot, M\cdot )$, $U=M^{-1}H$ is selfadjoint. By Theorem 4, all the eigenvalues of $M^{-1}H$ are real. If $H$ is positive and $M^{-1}Hx = \lambda x$, then $\lambda \langle x,x\rangle = \langle x, M^{-1}H x\rangle = (x,Hx)>0$ for $x\ne 0$, which implies $\lambda >0$. So under the condition that $H$ is positive, all eigenvalues of $M^{-1}H$ are positive. \end{proof}

\noindent $\blacktriangleright$ 9.  (page 119)  Give an example to show that Theorem 11' is false if $M$ is not positive.
\begin{solution}
Skipped for this version.
\end{solution}

\noindent $\blacktriangleright$ 10.  (page 119)  Prove Theorem 12. ({\it Hint}: Use Theorem 8.)
\begin{proof}By Theorem 8, we can assume $N$ has an orthonormal basis $v_1,\cdots, v_n$ consisting of genuine eigenvectors. We assume the eigenvalue corresponding to $v_j$ is $n_j$. Then by letting $x=v_j$, $j=1,\cdots, n$ and by the definition of $|\!|N|\!|$, we can conclude $|\!|N|\!| \ge \max|n_j|$. Meanwhile, $\forall x \in X$ with $|\!|x|\!| =1$, there exist $a_1,\cdots,a_n \in \mathbb C$, so that $\sum|a_j|^2=1$ and $x=\sum a_jv_j$. So
\[
\frac{|\!|Nx|\!|}{|\!|x|\!|}=|\!|\sum a_jn_jv_j|\!| = \sqrt{\sum |a_jn_j|^2} \le \max_{1\le j\le n}|n_j|\sqrt{\sum |a_j|^2} = \max|n_j|.
\]
This implies $|\!|N|\!|\le\max|n_j|$. Combined, we can conclude $|\!|N|\!| = \max|n_j|$.\end{proof}
\begin{remark}
Compare the above result with formula (48) and Theorem 18 of Chapter 7.
\end{remark}

\noindent $\blacktriangleright$ 11.  (page 119)  We define the cyclic shift mapping $S$, acting on vectors in $\mathbb C^n$, by $S(a_1, a_2, \cdots, a_n) = (a_n, a_1, \cdots, a_{n-1})$.

(a) Prove that $S$ is an isometry in the Euclidean norm.

(b) Determine the eigenvalues and eigenvectors of $S$.

(c) Verify that the eigenvectors are orthogonal.

\begin{proof}$|S(a_1,\cdots, a_n)|=|(a_n, a_1, \cdots, a_{n-1})| = |(a_1,\cdots, a_n)|$. So $S$ is an isometry in the Euclidean norm. To determine the eigenvalues and eigenvectors of $S$, note under the canonical basis $e_1,\cdots, e_n$, $S$ corresponds to the matrix
\[
A =\left(
\begin{matrix}
0 & 0 & 0 & \cdots & 0 & 1 \\
1 & 0 & 0 & \cdots & 0 & 0 \\
0 & 1 & 0 & \cdots & 0 & 0 \\
\cdots & \cdots & \cdots & \cdots & \cdots & \cdots \\
0 & 0 & 0 & \cdots & 1 & 0
\end{matrix}
\right),
\]
whose characteristic polynomial is $p(s) = |A-sI| = (-s)^n + (-1)^{n+1}$. So the eigenvalues of $S$ are the solutions to the equation $s^n=1$, i.e. $\lambda_k = e^{\frac{2\pi k}{n}i}$, $k=1,\cdots,n$. Solve the equation $Sx_k=\lambda_kx_k$, we can obtain the general solution as $x_k =(\lambda_k^{n-1},\lambda_k^{n-2},\cdots,\lambda_k,1)'$. After normalization, we have $x_k = \frac{1}{\sqrt{n}}(\lambda_k^{n-1},\lambda_k^{n-2},\cdots,\lambda_k,1)'$. Therefore, for $i\ne j$,
\[
(x_i,x_j)=\frac{1}{n}\sum_{k=1}^n\lambda_i^{k-1}\bar\lambda_j^{k-1}=\frac{1}{n}\sum_{k=1}^n(\lambda_i\bar\lambda_j)^{k-1}=\frac{1}{n}\frac{1-(\lambda_i\bar\lambda_j)^n}{1-\lambda_i\bar\lambda_j}=0.
\]
 \end{proof}

\noindent $\blacktriangleright$ 12.  (page 120)  (i) What is the norm of the matrix
\[
A = \left(
      \begin{array}{cc}
        1 & 2 \\
        0 & 3 \\
      \end{array}
    \right)
\]
in the standard Euclidean structure?

(ii) Compare the value of $|\!| A |\!|$ with the upper and lower bounds of $|\!| A |\!|$ asked for in Exercise 19 of Chapter 7.

\smallskip

(i)
\begin{solution}
$A^*A=\left[\begin{matrix}1 & 0 \\ 2 & 3 \end{matrix}\right]\left[\begin{matrix}1 & 2 \\ 0 & 3\end{matrix}\right]=\left[\begin{matrix}1 & 2 \\ 2 & 13\end{matrix}\right]$, which has eigenvalues $7\pm \sqrt{40}$. By Theorem 13, $|\!|A|\!|=\sqrt{7+\sqrt{40}}\approx 3.65$.
\end{solution}

(ii) \begin{solution} This is consistent with the estimate obtained in Exercise 19 of Chapter 7: $3\le |\!|A|\!| \le 3.7417$. \end{solution}

\noindent $\blacktriangleright$ 13. (page 120)  What is the norm of the matrix
\[
\left(
  \begin{array}{ccc}
    1 & 0 & -1 \\
    2 & 3 & 0 \\
  \end{array}
\right)
\]
in the standard Euclidean structures of $\mathbb R^2$ and $\mathbb R^3$.
\begin{solution} $\left[\begin{matrix}1 & 2 \\ 0 & 3 \\ -1 & 0 \end{matrix}\right]\left[\begin{matrix}1 & 0 & -1 \\ 2 & 3 & 0 \end{matrix}\right]= \left[\begin{matrix}5 & 6 & -1 \\6 & 9 & 0 \\ -1 & 0 & 1\end{matrix}\right]$, which has eigenvalues 0, 1.6477, and
13.3523 By Theorem 13, the norm of the matrix is approximately $\sqrt{13.3523}\approx 3.65$. \end{solution}

\section{Calculus of Vector- and Matrix- Valued Functions}

The book's own solution gives answers to Ex 2, 3, 6, 7.

\bigskip

$\bigstar$ {\bf Erratum}: In Exercise 6 (p.129), we should have $\det e^A = e^{tr A}$ instead of $\det e^A = eA$.

\bigskip

$\bigstar$ {\bf Comments}: In the proof of Theorem 11, to see why $sI - A^d = \prod_0^{d-1} ()$ holds, see Lemma 3 of Appendix 6, formula (9) (p.369).

\bigskip

\noindent  $\blacktriangleright$ 1. (page 122) Prove the fundamental lemma for vector valued functions. ({\it Hint}: Show that for every vector $y$, $(x(t),y)$ is a constant.)
\begin{proof}Following the hint, note $\frac{d}{dt}(x(t),y)=(\dot{x}(t),y)+(x(t),\dot{y})=0$. So $(x(t),y)$ is a constant by fundamental lemma for scalar valued functions. Therefore $(x(t)-x(0),y)=0$, $\forall y \in \mathbb K^n$. This implies $x(t)\equiv x(0)$.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 124) Derive formula (3) using product rule (iii).
\begin{proof}$A^{-1}(t)A(t)=I$. So $0 = \frac{d}{dt}\left[A^{-1}(t)A(t)\right]=\frac{d}{dt}A^{-1}(t)\cdot A(t) + A^{-1}(t) \dot{A}(t)$ and $\frac{d}{dt}A^{-1}(t) =\frac{d}{dt}A^{-1}(t)\cdot A(t)A^{-1}(t) =-A^{-1}(t)\dot{A}(t)A^{-1}(t)$.\end{proof}

\noindent  $\blacktriangleright$ 3. (page 128) Calculate
\[
e^{A+B} = \exp \left(\begin{array}{cc}
                 0 & 1 \\
                 1 & 0
               \end{array}\right).
\]
\begin{solution}$\left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right)^2=\left(\begin{matrix}1 & 0 \\ 0 & 1\end{matrix}\right)$. So \begin{eqnarray*}
\left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right)^n=
\begin{cases}
I_{2\times 2} & \mbox{if $n$ is even}\\
\left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right) & \mbox{if $n$ is odd}.
\end{cases}
\end{eqnarray*}
Therefore, we have
\begin{eqnarray*}
\exp\{A+B\}
&=& \sum_{n=0}^{\infty}\frac{1}{n!}\left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right)^n = \sum_{k=0}^{\infty}\frac{I_{2\times 2}}{(2k)!} + \sum_{k=0}^{\infty}\frac{1}{(2k+1)!}\left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right)
= \frac{e+e^{-1}}{2}I_{2\times 2} + \frac{e-e^{-1}}{2} \left(\begin{matrix}0 & 1 \\ 1 & 0\end{matrix}\right).
\end{eqnarray*}
\end{solution}

\noindent $\blacktriangleright$ 4. (page 129) Prove the proposition stated in the Conclusion.
\begin{proof}For any $\varepsilon >0$, there exists $M>0$, so that $\forall m\ge M$, $\sup_t|\!|\dot{E}_m(t)-F(t)|\!|<\varepsilon$. So $\forall m\ge M$, $\forall t, h$,
\begin{eqnarray*}
& & \left|\!\left| \frac{1}{h}[E_m(t+h)-E_m(t)]-F(t)\right|\!\right| \\
&=& \left|\!\left|\frac{1}{h}\int_t^{t+h}[\dot{E}_m(s)-F(s)]ds +\frac{1}{h}\int_t^{t+h}F(s)ds  - F(t)\right|\!\right| \\
&\le& \frac{\int_t^{t+h}|\!| \dot{E}_m(s)-F(s)|\!|ds}{h}+\left|\!\left| \frac{1}{h}\int_t^{t+h}F(s)ds-F(t)\right|\!\right|\\
 &<&\varepsilon + \left|\!\left|\frac{1}{h}\int_t^{t+h}F(s)ds-F(t) \right|\!\right|.
\end{eqnarray*}
Under the assumption that $F$ is continuous, we have
\begin{eqnarray*}
\lim_{h\to 0}\left|\!\left|\frac{1}{h}[E(t+h)-E(t)]-F(t) \right|\!\right| &=& \lim_{h\to 0}\lim_{m\to\infty}\left|\!\left|\frac{1}{h}[E_m(t+h)-E_m(t)] - F(t) \right|\!\right| \\
&\le& \varepsilon + \lim_{h\to 0}\left|\!\left|\frac{1}{h}\int_t^{t+h}F(s)ds-F(t) \right|\!\right|=\varepsilon.
\end{eqnarray*}
Since $\varepsilon$ is arbitrary, we must have $\lim_{h\to 0}\frac{1}{h}[E(t+h)-E(t)]=F(t)$.
\end{proof}

\noindent $\blacktriangleright$ 5. (page 129) Carry out the details of the argument that $\dot{E}_m(t)$ converges.
\begin{proof}By formula (12), $\dot{E}_m(t) = \sum_{k=1}^m\sum_{i=0}^{k-1}\frac{1}{k!}A^i(t)\dot{A}(t)A^{k-i-1}(t)$. So for $m$ and $n$ with $m<n$,
\begin{eqnarray*}
|\!|\dot{E}_m(t)-\dot{E}_n(t) |\!| &\le& \sum_{k=m+1}^n \sum_{i=0}^{k-1} \frac{|\!| A^i(t) \dot{A}(t) A^{k-i-1}(t)|\!|}{k!}
=\sum_{k=m+1}^n\sum_{i=0}^{k-1} \frac{|\!|A(t)|\!|^{k-1}|\!|\dot{A}(t) |\!|}{k!}\\
 &=& \sum_{k=m+1}^n\frac{|\!|A(t) |\!|^{k-1}}{(k-1)!}|\!| \dot{A}(t)|\!|
 = |\!|\dot{A}(t)|\!| [e_n(|\!|A(t)|\!|)-e_m( |\!|A(t)|\!|)]\to 0
\end{eqnarray*}
as $m,n\to \infty$. This shows $(\dot{E}_m(t))_{m=1}^{\infty}$ is a Cauchy sequence, hence convergent.
\end{proof}

\noindent $\blacktriangleright$ 6. (page 129) Apply formula (10) to $Y(t) = e^{At}$ and show that
\[
\det e^A = e^{tr A}.
\]
\begin{proof}Apply formula (10) to $Y(t)=e^{At}$, we have $\frac{d}{dt}\log\det Y(t) = \mbox{tr}(e^{-At}e^{At}A)=\mbox{tr}A$. Integrating from 0 to $t$, we get $\log\det Y(t) - \log\det Y(0) = t \mbox{tr}A$. So $\det Y(t) = e^{t\mbox{tr}A}$. In particular, $\det e^A = e^{\mbox{tr}A}$. \end{proof}

\noindent $\blacktriangleright$ 7. (page 129) Prove that all eigenvalues of $e^A$ are of the form $e^a$, $a$ an eigenvalue of $A$. {\it Hint}: Use Theorem 4 of Chapter 6, along with Theorem 6 below.
\begin{proof}Without loss of generality, we can assume $A$ is a Jordan matrix. Then $e^A$ is an upper triangular matrix and its entries on the diagonal line have the form $e^a$, where $a$ is an eigenvalue of $A$. So all eigenvalues of $e^A$ are the exponentials of eigenvalues of $A$.\end{proof}

\noindent $\blacktriangleright$ 8. (page 142) (a) Show that the set of all complex, self-adjoint $n\times n$ matrices forms $N=n^2$-dimensional linear space over the reals.

(b) Show that the set of complex, self-adjoint $n\times n$ matrices that have one double and $n-2$ simple eigenvalues can be described in terms of $N-3$ real parameters.

(a) \begin{proof}The total number of ``free" entries is $\frac{n(n+1)}{2}$. The entries on the diagonal line must be real. So the dimension is $\frac{n(n+1)}{2}\times 2 - n = n^2$.
\end{proof}

(b)\begin{proof} Similar to the argument in the text, the total number of complex parameters that determine the eigenvectors is $(n-1)+\cdots+2=\frac{n(n-1)}{2}-1$. This is equivalent to $n(n-1)-2$ real parameters. The number of distinct (real) eigenvalues is $n-1$. So the dimension = $n^2-n-2+n-1=n^2-3$.
\end{proof}

\noindent $\blacktriangleright$ 9. (page 142) Choose in (41) at random two self-adjoint $10\times 10$ matrices $M$ and $B$. Using available software (MATLAB, MAPLE, etc.) calculate and graph at suitable intervals the 10 eigenvalues of $B+tM$ as functions of $t$ over some $t$-segment.
\begin{solution}See the Matlab/Octave program {\bf aoc.m} below.

\begin{verbatim}
function aoc

%AOC illustrates the avoidance-of-crossing phenomenon
%    of the neighboring eigenvalues of a continuous
%    symmetric matrix. This is Exercise 9, Chapter 9
%    of the textbook, Linear Algebra and Its Applications,
%    2nd Edition, by Peter Lax.

% Initialize global variables
matrixSize = 10;
lowerBound = 0.01; %lower bound of t's range
upperBound = 3; %upper bound of t's range
stepSize = 0.1;
t = lowerBound:stepSize:upperBound;

% Generate random symmetric matrix
temp1 = rand(matrixSize);
temp2 = rand(matrixSize);
M = temp1+temp1';
B = temp2+temp2';

% Initialize eigenvalue matrix to zeros;
% use each column to store eigenvalues for
% a given parameter
eigenval = zeros(matrixSize,numel(t));
for i = 1:numel(t)
    eigenval(:,i) = eig(B+t(i)*M);
end

% Plot eigenvalues according to values of parameter
hold off;
disp(['There are ', num2str(matrixSize), ' eigenvalue curves.']);
disp(' ');
for j = 1:matrixSize
    disp(['Eigenvalue curve No. ', num2str(j),'. Press ENTER to continue...']);
    plot(t, eigenval(j,:));
    xlabel('t');
    ylabel('eigenvalues');
    title('Matlab illustration of Avoidance of Crossing');
    hold on;
    pause;
end
hold off;
\end{verbatim}
\end{solution}

\section{Matrix Inequalities}

The book's own solution gives answers to Ex 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15.

\bigskip

$\bigstar$ {\bf Erratum}: In Exercise 6 (p.152), $\text{Im}x >0$ should be $\text{Im}z > 0$.

\bigskip

\noindent  $\blacktriangleright$ 1. (page 146) How many square roots are there of a positive mapping?
\begin{solution}Suppose $H$ has $k$ distinct eigenvalues $\lambda_1$, $\cdots$, $\lambda_k$. Denote by $X_j$ the subspace consisting of eigenvectors of $H$ pertaining to the eigenvalue $\lambda_j$. Then $H$ can be represented as $H=\sum_{j=1}^k\lambda_jP_j$ were $P_j$ is the projection to $X_j$. Let $A$ be a positive square root of $H$, we claim $A$ has to be $\sum_{j=1}^k\sqrt{\lambda_j}P_j$. Indeed, if $\alpha$ is an eigenvalue of $A$ and $x$ is an eigenvector of $A$ pertaining to $\alpha$, then $\alpha^2$ is an eigenvalue of $H$ and $x$ is an eigenvector of $H$ pertaining to $\alpha^2$. So we can assume $A$ has $m$ distinct eigenvalues $\alpha_1$, $\cdots$, $\alpha_m$ $(m\le k)$ with $\alpha_i=\sqrt{\lambda_i}$ $(1\le i\le m)$. Denote by $Y_i$ the subspace consisting of eigenvectors of $A$ pertaining to $\alpha_i$. Then $Y_i\subset X_i$. Since $H=\bigoplus_{i=1}^mY_i=\bigoplus_{j=1}^kX_j$, we must have $m=k$ and $Y_i=X_i$, otherwise at lease one of the $\le$ in the sequence of inequalities $\dim H=\sum_{i=1}^m\dim Y_i\le \sum_{i=1}^m\dim X_i\le \sum_{i=1}^k\dim X_i=\dim H$ is $<$, contradiction. So $A$ can be uniquely represented as $A = \sum_{j=1}^k\sqrt{\lambda_j}P_j$, the same as $\sqrt{H}$ defined in formula (6).   \end{solution}

\noindent  $\blacktriangleright$ 2. (page 146) Formulate and prove properties of nonnegative mappings similar to parts (i), (ii), (iii), (iv), and (vi) of Theorem 1.
\begin{prop}
(i) The identity $I$ is nonnegative.
(ii) If $M$ and $N$ are nonnegative, so is their sum $M+N$, as well as $aM$ for any nonnegative number $a$.
(iii) If $H$ is nonnegative and $Q$ is invertible, we have $Q^*HQ\ge 0$.
(iv) $H$ is nonnegative if and only if all its eigenvalues are nonnegative.
(vi) Every nonnegative mapping has a nonnegative square root, uniquely determined.
\end{prop}

\begin{proof}
(i) and (ii) are obvious. For part (iii), we write the quadratic form associated with $Q^*HQ$ as
\[
(x,Q^*HQx)=(Qx,HQx)=(y,Hy)\ge 0,
\]
where $y=Qx$. For part (iv), by the selfadjointness of $H$, there exists an orthogonal basis of eigenvectors. Denote these by $h_j$ and the corresponding eigenvalue by $a_j$. Then any vector $x$ can be expressed as a linear combination of the $h_j$'s: $x=\sum_j x_jh_j$. So $(x,Hx)=\sum_{i,j}(x_ih_i,x_ja_jh_j)=\sum_{j=1}^na_j|x_j|^2$. From the formula it is clear that $(x,Hx)\ge 0$ for any $x$ if and only if $a_j\ge 0$, $\forall j$. For part (vi), the proof is similar to that of positive mappings and we omit the lengthy proof. Cf. also solution to Exercise 10.1.
\end{proof}

\noindent  $\blacktriangleright$ 3. (page 149) Construct two real, positive $2\times 2$ matrices whose symmetrized product is {\it not} positive.
\begin{solution}Let $A$ be a mapping that maps the vector $(0,1)'$ to $(0,\alpha_2)'$ with $\alpha_2>0$ sufficiently small and $(1,0)'$ to $(\alpha_1,0)'$ with $\alpha_1>0$ sufficiently large. Let $B$ be a mapping that maps the vector $(1,1)'$ to $(\lambda_1,\lambda_1)'$ with $\lambda_1>0$ sufficiently small and $(-1,1)'$ to $(-\lambda_2, \lambda_2)'$ with $\lambda_2>0$ sufficiently large. Then both $A$ and $B$ are positive mappings, and we can find $x$ between $(1,1)'$ and $(0,1)'$ so that $(Ax,Bx)<0$. By the analysis in the paragraph below formula $(14)'$, $AB+BA$ is not positive. More precisely, we have $A = \left(\begin{matrix}\alpha_1 & 0 \\ 0 & \alpha_2\end{matrix}\right)$ and $B =\frac{1}{2}\left(\begin{matrix}\lambda_1+\lambda_2 & \lambda_1-\lambda_2 \\ \lambda_1-\lambda_2 & \lambda_1+\lambda_2\end{matrix}\right)$.
\end{solution}

\noindent  $\blacktriangleright$ 4. (page 151) Show that if $0<M<N$, then (a) $M^{1/4} < N^{1/4}$. (b) $M^{1/m} < N^{1/m}$, $m$ a power of 2. (c) $\log M \le \log N$.
\begin{proof}By Theorem 5 and induction, it is easy to prove (a) and (b). For (c), we follow the hint. If $M$ has the spectral resolution $M=\sum_{i=1}^k\lambda_iP_i$, $\log M$ is defined as
\[
\log M = \sum_{i=1}^k\log\lambda_i P_i = \sum_{i=1}^k\lim_{m\to\infty}m(\lambda_i^{\frac{1}{m}}-1)P_i=\lim_{m\to\infty}m\left(\sum_{i=1}^k\lambda_i^{\frac{1}{m}}P_i-\sum_{i=1}^kP_i\right)=\lim_{m\to\infty}m(M^{\frac{1}{m}}-I).
 \]
So $\log M = \lim_{m\to\infty}m(M^{\frac{1}{m}}-I) \le \lim_{m\to\infty}m(N^{\frac{1}{m}}-I)=\log N$.\end{proof}

\noindent  $\blacktriangleright$ 5. (page 151) Construct a pair of mappings $0 < M < N$ such that $M^2$ is {\it not} less than $N^2$. ({\it Hint}: Use Exercise 3).
\begin{solution} (from the textbook's solution, pp. 291) Choose $A$ and $B$ as in Exercise 3, that is positive matrices whose symmetrized product is not positive. Set
\[
M=A, N = A+tB,
\]
$t$ sufficiently small positive number. Clearly, $M<N$.
\[
N^2=A^2+t(AB+BA)+t^2B^2;
\]
for $t$ small the term $t^2B$ is negligible compared with the linear term. Therefore for $t$ small $N^2$ is not greater than $M^2$.
\end{solution}

\noindent  $\blacktriangleright$ 6. (page 151) Verify that (19) defines $f(z)$ for a complex argument $z$ as an analytic function, as well as that $\text{Im} f(z)>0$ for $\text{Im} z > 0$.
\begin{proof} For $f(z)=az+b-\int_0^{\infty}\frac{dm(t)}{z+t}$, we have
\[
f(z+\Delta z)-f(z)=a\Delta z + \Delta z \cdot \int_0^{\infty}\frac{dm(t)}{(z+\Delta z+t)(z+t)}.
\]
So if we can show $\lim_{\Delta z\to 0}\int_0^{\infty}\frac{dm(t)}{(z+\Delta z+t)(z+t)}$ exists and is finite, $f(z)$ is analytic by definition. Indeed, if $\mbox{Im} z > 0$, for $\Delta z$ sufficiently small, we have
\[
\left|\frac{1}{z+\Delta z+t}\right|  \le \frac{1}{|z+t|-|\Delta z|} \le \frac{1}{\mbox{Im}z - |\Delta z|} \le \frac{2}{\mbox{Im}z}.
\]
So by Dominated Convergence Theorem, $\lim_{\Delta z\to 0}\int_0^{\infty}\frac{dm(t)}{(z+\Delta z+t)(z+t)}$ exists and is equal to $\int_0^{\infty}\frac{dm(t)}{(z+t)^2}$, which is finite. To see $\mbox{Im}f(z)>0$ for $\mbox{Im}z > 0$, we note
\[
\mbox{Im}f(z) = a\mbox{Im}z - \mbox{Im}\int_0^{\infty}\frac{dm(t)}{\mbox{Re}z+t +i\mbox{Im}z} = \mbox{Im}z \left[ a+ \int_0^{\infty}\frac{ dm(t)}{(\mbox{Re}z+t)^2 +(\mbox{Im}z)^2} \right].
\]
\end{proof}
\begin{remark}
This exercise can be used to verify formula (19) on p.151.
\end{remark}

\noindent  $\blacktriangleright$ 7. (page 153) Given $m$ positive numbers $r_1$, $\cdots$, $r_m$, show that the matrix
\[
G_{ij} = \frac{1}{r_i + r_j + 1}
\]
is positive.
\begin{proof} Consider the Euclidean space $L^2(-\infty,1]$, with the inner product $(f,g):=\int_{-\infty}^1f(t)g(t)dt$. Choose $f_j =e^{r_j(t-1)}$, $j=1,\cdots, m$, then the associated Gram matrix is
\[
G_{ij} = (f_i,f_j)=\int_{-\infty}^1\frac{e^{(r_i+r_j)t}}{e^{r_i+r_j}}dt=\frac{1}{r_i+r_j}.
\]
Clearly, $(f_j)_{j=1}^m$ are linearly independent. So $G$ is positive.
\end{proof}

\noindent  $\blacktriangleright$ 8. (page 158) Look up a proof of the calculus result (35).
\begin{proof}
We apply the change of variable formula as follows
\[
\int_{-\infty}^{\infty}e^{-z^2}dz = \sqrt{\int_{\mathbb R^2}e^{-x^2-y^2}dxdy}= \sqrt{\int_0^{2\pi}d\theta\int_0^{\infty}e^{-r^2}rdr} = \sqrt{2\pi \cdot \frac{1}{2}} = \sqrt{\pi}.
\]
\end{proof}

\noindent  $\blacktriangleright$ 9. (page 162) Extend Theorem 14 to the case when $\dim V = \dim U - m$, where $m$ is greater than 1.
\begin{proof}
The extension is straightforward, just replace the paragraph (on page 161) ``If $S$ is a subspace of $V$, then $T=S$ and $\dim T = \dim S$. ... It follows that
\[
\dim S - 1 \le \dim T
\]
as asserted." with the following one: Let $T = S\cap V$ and $T_1 = S \cap V^{\perp}$, where $V^{\perp}$ stands for the compliment of $V$ in $U$. Then $\dim T + \dim T_1 = \dim S$. Since $\dim T_1 \le \dim V^{\perp}=n-(n-m)=m$, $\dim T \ge \dim S - m$.

The rest of the proof is the same as the proof of Theorem 14 and we can conclude that \[ p_{\pm}(A)-m \le p_{\pm}(B) \le p_{\pm}(A).\]
\end{proof}

\noindent  $\blacktriangleright$ 10. (page 164) Prove inequality $(44)'$.
\begin{proof}For any $x$, $(x, (N-M-dI)x)= (x,(N-M)x)-d|\!|x|\!|^2 \le |\!|N-M|\!||\!|x|\!|^2-d|\!|x|\!|^2=0$. Similarly, $(x,(M-N-dI)x)=(x,(M-N)x)-d|\!|x|\!|^2\le |\!|M-N|\!||\!|x|\!|^2-d|\!|x|\!|^2\le 0$.\end{proof}

\noindent  $\blacktriangleright$ 11. (page 166) Show that (51) is largest when $n_i$ and $m_j$ are arranged in the same order.
\begin{proof} It's easy to see the problem can be reduced to the case $k=2$. To prove this case, we note if $m_1\le m_2$ and $n_{p_1}\ge n_{p_2}$, we have
\[
m_2n_{p_1}+m_1n_{p_2}-m_2n_{p_2}-m_1n_{p_1} = (m_2-m_1)(n_{p_1}-n_{p_2})\ge 0.
\]
\end{proof}

\noindent  $\blacktriangleright$ 12. (page 168) Prove that if the self-adjoint part of $Z$ is positive, then $Z$ is invertible, and the self-adjoint part of $Z^{-1}$ is positive.
\begin{proof}Assume $Z$ is not invertible. Then there exists $x\ne 0$ such that $Zx=0$. In particular, this implies $(x,Zx)=(x,Z^*x)=0$. Sum up these two, we get $(x,(Z+Z^*)x)=0$. Contradictory to the assumption that the selfadjoint part of $Z$ is positive.
For any $x\ne 0$, there exists $y \ne 0$ so that $x=Zy$. So
\begin{eqnarray*}
(x, (Z^{-1}+(Z^{-1})^*)x)
&=& (x,Z^{-1}x)+(x,(Z^{-1})^*x) \\
&=& (Zy,y)+(Z^{-1}x,x) \\
&=& (y,Z^*y)+(y,Zy) \\
&=& (y, (Z+Z^*)y)>0.
\end{eqnarray*}
This shows the selfadjoint part of $Z^{-1}$ is positive. \end{proof}

\noindent  $\blacktriangleright$ 13. (page 170) Let $A$ be any mapping of a Euclidean space into itself. Show that $AA^*$ and $A^*A$ have the same eigenvalues with the same multiplicity.
\begin{proof} Exercise Problem 14 has proved the claim for non-zero eigenvalues. Since the dimensions of the spaces of generalized eigenvectors of $AA^*$ and $A^*A$ are both equal to the dimension of the underlying Euclidean space, we conclude by Spectral Theorem that their zero eigenvalues must have the same multiplicity.\end{proof}

\noindent  $\blacktriangleright$ 14. (page 171) Let $A$ be a mapping of a Euclidean space into another Euclidean space. Show that $AA^*$ and $A^*A$ have the same {\it nonzero} eigenvalues with the same multiplicity.
\begin{proof} Suppose $a$ is a non-zero eigenvalue of $AA^*$ and $x$ is an eigenvector of $AA^*$ pertaining to $a$: $AA^*x = ax$. Applying $A^*$ to both sides, we get $A^*A(A^*x)=aA^*x$. Since $a\ne 0$ and $x\ne 0$, $A^*x\ne 0$ by $AA^*x = ax$. Therefore, $a$ is an eigenvalue of $A^*A$ with $A^*x$ an eigenvector of $A^*A$ pertaining to $a$. By symmetry, we conclude $AA^*$ and $A^*A$ have the same set of non-zero eigenvalues.

Fix a non-zero eigenvalue $a$, and suppose $x_1$, $\cdots$, $x_m$ is a basis for the space of generalized eigenvectors of $AA^*$ pertaining to $a$. Since $a\ne 0$, we can claim $A^*x_1$, $\cdots$, $A^*x_m$ are linearly independent. Indeed, assume not, then there must exist $\alpha_1$, $\cdots$, $\alpha_m$ not all equal to 0, such that $\sum_{i=1}^m\alpha_iA^*x_i=0$. This implies $a(\sum_{i=1}^m\alpha_ix_i)=\sum_{i=1}^m\alpha_iAA^*x_i=A(\sum_{i=1}^m\alpha_iA^*x_i)=0$, which further implies $x_1$, $\cdots$, $x_m$ are linearly dependent since $a\ne 0$. Contradiction.

This shows the dimension of the space of generalized eigenvectors of $AA^*$ pertaining to $a$ is no greater than that of the space of generalized eigenvectors of $A^*A$ pertaining to $a$. By symmetry, we conclude the spaces of generalized eigenvectors of $AA^*$ and $A^*A$ pertaining to the same nonzero eigenvalue have the same dimension. Combined, we can conclude $AA^*$ and $A^*A$ have the same non-zero eigenvalues with the same (algebraic) multiplicity. \end{proof}
\begin{remark}
The {\it multiplicity} referred to in this problem is understood as algebraic multiplicity, which is equal to the dimension of the space of generalized eigenvectors.
\end{remark}

\noindent  $\blacktriangleright$ 15. (page 171) Give an example of a $2\times 2$ matrix $Z$ whose eigenvalues have positive real part but $Z+Z^*$ is not positive.
\begin{solution}Let $Z = \left(\begin{matrix}1+bi & 3 \\ 0 & 1+bi \end{matrix}\right)$ where $b$ could be any real number. Then the eigenvalue of $Z$, $1+bi$, has positive real part. Meanwhile, $Z+Z^*=\left(\begin{matrix}2 & 3 \\ 3 & 2\end{matrix}\right)$ has characteristic polynomial $p(s) = (2-s)^2-9 = (s-5)(s+1)$. So $Z+Z^*$ has eigenvalue $5$ and $-1$, and therefore cannot be positive.\end{solution}

\noindent  $\blacktriangleright$ 16. (page 171) Verify that the commutator (50) of two self-adjoint matrices is anti-self-adjoint.
\begin{proof}Suppose $A$ and $B$ are selfadjoint. Then for any $x$ and $y$,
\[
(x,(AB-BA)^*y)=((AB-BA)x,y)=(ABx,y)-(BAx,y)=(x,BAy)-(x,ABy) = (x,-(AB-BA)y).
\]
So $(AB-BA)^*=-(AB-BA)$.
\end{proof}

\section{Kinematics and Dynamics}

The book's own solution gives answers to Ex 1, 5, 6, 8, 9.

\medskip

\noindent  $\blacktriangleright$ 1. (page 174) Show that if $M(t)$ satisfies a differential equation of form (11), where $A(t)$ is antisymmetric for each $t$ and the initial condition (5), then $M(t)$ is a rotation for every $t$.
\begin{proof}We note $\frac{d}{dt}(M(t)M^*(t))=\dot{M}(t)M^*(t)+M(t)\dot{M}^*(t)=A(t)+A^*(t)=0$. So $M(t)M^*(t)\equiv M(0)M^*(0) =I$. Also, $f(t) =\det M(t)$ is continuous function of $t$ and takes values either 1 or -1 by the isometry property of $M(t)$. Since $f(0)=1$, we have $f(t)\equiv 1$. By Theorem 1, $M(t)$ is a rotation for every $t$. \end{proof}

\noindent  $\blacktriangleright$ 2. (page 174) Suppose that $A$ is independent of $t$; show that the solution of equation (11) satisfying the initial condition (5) is
\[
M(t) = e^{tA}.
\]
\begin{proof}$\lim_{h\to 0}\frac{M(t+h)-M(t)}{h}=\lim_{h\to 0}\frac{e^{tA}(e^{hA}-I)}{h}=Ae^{tA}$, i.e. $\dot{M}(t) = AM(t)$. Clearly $M(0)=I$.\end{proof}

\noindent  $\blacktriangleright$ 3. (page 174) Show that when $A$ depends on $t$, equation (11) is {\it not} solved by
\[
M(t) = e^{\int_0^t A(s)ds},
\]
unless $A(t)$ and $A(s)$ commute for all $s$ and $t$.
\begin{proof}The reason we need commutativity is that the following equation is required in the calculation of derivative:
\begin{eqnarray*}
\frac{1}{h}(M(t+h)-M(t))
&=&\frac{1}{h}\left(e^{\int_0^{t+h}A(s)ds}-e^{\int_0^tA(s)ds}\right)\\
&=&\frac{1}{h}\left(e^{\int_0^tA(s)ds+\int_t^{t+h}A(s)ds}-A^{\int_0^tA(s)ds}\right)\\
&=&\frac{1}{h}e^{\int_0^tA(s)ds}\left(e^{\int_t^{t+h}A(s)ds}-I\right),
\end{eqnarray*}
i.e. $e^{\int_0^tA(s)ds + \int_t^{t+h}A(s)ds}=e^{\int_0^tA(s)ds}e^{\int_t^{t+h}A(s)ds}$. So when this commutativity holds,
\[
\dot{M}(t) = \lim_{h\to 0}\frac{1}{h}(M(t+h)-M(t)) = M(t)A(t).
\]
\end{proof}

\noindent  $\blacktriangleright$ 4. (page 175) Show that if $A$ in (15) is not equal to 0, then all vectors annihilated by $A$ are multiples of (16).
\begin{proof}If $f=(x,y,z)^T$ satisfies $Af=0$, we must have
\begin{eqnarray*}
\begin{cases}
ay + bz = 0\\
-ax+cz=0\\
-bx-cy=0.
\end{cases}
\end{eqnarray*}
By discussing various possibilities ($a,b,c = 0$ or not), we can check $f$ is a multiple of $(-c,b,-a)^T$.
\end{proof}

\noindent  $\blacktriangleright$ 5. (page 175) Show that the two other eigenvalues of $A$ are $\pm i\sqrt{a^2 + b^2 + c^2}$.
\begin{proof}
\[
\det (sI-A) = \det \left(\begin{matrix}\lambda & -a & -b \\ a & \lambda & -c \\ b & c & \lambda\end{matrix}\right) = \lambda(\lambda^2+c^2)-a(-a\lambda + bc) + b(ac+b\lambda) = \lambda^3+\lambda(c^2+b^2+a^2).
\]
Solving it gives us the other two eigenvalues.
\end{proof}

\noindent  $\blacktriangleright$ 6. (page 176) Show that the motion $M(t)$ described by (12) rotation around the axis through the vector $f$ given by formula (16). Show that the angle of rotation is $t\sqrt{a^2 + b^2 + c^2}$. ({\it Hint}: Use formula $(4)'$.)
\begin{proof} Since $A=\left(\begin{matrix}0 & a & b \\ -a & 0 & c \\ -b & -c & 0\end{matrix}\right)$ is anti-symmetric, $M(t)M^*(t)=e^{tA}e^{tA^*}=e^{tA}e^{-tA}=I$. By Exercise 7 of Chapter 9, all eigenvalues of $e^{At}$ has the form of $e^{at}$, where $a$ is an eigenvalue of $A$. Since the eigenvalues of $A$ are $0$ and $\pm ik$ with $k=\sqrt{a^2+b^2+c^2}$ (Exercise 5), the eigenvalues of $e^{At}$ are 1 and $e^{\pm ikt}$. This implies $\det e^{At}=1\cdot e^{ikt} \cdot e^{-ikt}=1$. By Theorem 1, $M=e^{At}$ is a rotation. Let $f$ be given by formula (16). From $Af=0$ we deduce that $e^{At}f=f$; thus $f$ is the axis of the rotation $e^{At}$. The trace of $e^{At}$ is $1+e^{ikt}+e^{-ikt}=2\cos kt + 1$. According to formula $(4)'$, the angle of rotation $\theta$ of $e^{At}$ satisfies $2\cos\theta+1=\mbox{tr}e^{At}$. This shows that $\theta=kt=\sqrt{a^2+b^2+c^2}$.  \end{proof}

\noindent  $\blacktriangleright$ 7. (page 177) Show that the commutator
\[
[A, B] = AB - BA
\]
of two antisymmetric matrices is antisymmetric.
\begin{proof}$(AB-BA)^*=(AB)^*-(BA)^*=B^*A^*-A^*B^* = (-B)(-A)-(-A)(-B)=BA-AB = -(AB-BA)$. \end{proof}

\noindent  $\blacktriangleright$ 8. (page 177) Let $A$ denote the $3\times 3$ matrix (15); we denote the associated null vector (16) by $f_A$. Obviously, $f$ depends linearly on $A$.

(a) Let $A$ and $B$ denote two $3\times 3$ antisymmetric matrices. Show that
\[
\text{tr} AB = -2 (f_A, f_B),
\]
where $(,)$ denotes the standard scalar product for vectors in $\mathbb R^3$.
\begin{proof} See the solution in the textbook, on page 294. \end{proof}

\noindent  $\blacktriangleright$ 9. (page 177) Show that the cross product can be expressed as
\[
f_{|A,B|} = f_A \times f_B.
\]
\begin{proof} See the solution in the textbook, page 294. \end{proof}

\noindent  $\blacktriangleright$ 10. (page 184) Verify that solutions of the form (36) form a $2n$-dimensional linear space.
\begin{proof} It suffices to note that the set of 2n functions, $\{(\cos c_jt)h_j, (\sin c_jt)h_j\}_{j=1}^n$, are linearly independent, since any two of them are orthogonal when their subscripts are distinct.\end{proof}

\section{Convexity}

The book's own solution gives answers to Ex 2, 6, 7, 8, 10, 16, 19, 20.

\bigskip

$\bigstar$ {\bf Comments}:

1) The following results will help us understand some details in the
proofs of Theorem 6 and Theorem 10.

\begin{prop}
Let $S$ be an arbitrary subset of $X$ and $x$ an interior point of
$S$. For any real linear function $l$ defined on $X$, if
$l\not\equiv 0$, then $l(x)$ is an interior point of $\Gamma = l(S)$
in the topological sense.
\end{prop}
\begin{proof}
We can find $y\in X$ so that $l(y)\ne 0$. Then for $t$ sufficiently
small, $l(x)+tl(y)=l(x+ty)\in \Gamma$. So $\Gamma$ contains an
interval which contains $l(x)$, i.e. $l(x)$ is an interior point of
$\Gamma$ under the topology of $\mathbb R^1$.
\end{proof}

\begin{corollary}
If $K$ is an open convex set and $l$ is a linear function with
$l\not \equiv 0$, $\Gamma = l(K)$ is an open interval.
\end{corollary}
\begin{proof}
Note $\Gamma$ is convex and open in $\mathbb R^1$ in the topological
sense.
\end{proof}

\begin{prop}Let $K$ be a convex set and $K_0$ the set of all
interior points of $K$. Then $K_0$ is convex and open.
\end{prop}
\begin{proof}
(Convexity) $\forall x, y \in K_0$ and $a\in [0,1]$. For any $z\in
X$, $[ax+(1-a)y]+tz=a(x+tz)+(1-a)(y+tz)\in K$ when $t$ is
sufficiently small, since $x,y$ are interior points of $K$ and $K$
is convex.

(Openness) Fix $x \in K_0$, $\forall y_1 \in X$. We need to show for
$t$ sufficiently small, $x+ty_1\in K_0$. Indeed, $\forall y_2\in X$,
we can find a common $\varepsilon>0$, so that whenever $(t_1,t_2)\in
[-\varepsilon,\varepsilon]\times [-\varepsilon,\varepsilon]$,
$x+t_1y_1\in K$ and $x+t_2y_2\in K$. Fix any $t^* \in
[-\frac{\varepsilon}{2}, \frac{\varepsilon}{2}]$, by the convexity
of $K$,
$x+t^*y_1+t^{**}y_2=\frac{1}{2}(x+2t^*y_1)+\frac{1}{2}(x+2t^{**}y_2)
\in K$ when $t^{**} \in [-\frac{\varepsilon}{2},
\frac{\varepsilon}{2}]$. This shows $x+t^*y_1\in K_0$. Since $t^*$
is arbitrarily chosen from $[-\frac{\varepsilon}{2},
\frac{\varepsilon}{2}]$, we conclude for $t$ sufficiently small,
$x+ty_1 \in K_0$. That is, $x$ is an interior point of $K_0$. By the
arbitrariness of $x$, $K_0$ is open.
\end{proof}

2) Regarding Theorem 10 (Carath\'{e}odory): i) Among all the three conditions, ``convexity" is the essential one; ``closedness" and ``boundedness" are to guarantee $K$ has extreme points. (ii) Solution to Exercise 14 may help us understand the proof of Theorem 10. When a convex set has no interior points, it's often useful to realize that the dimension can be reduced by 1. (iii) To understand ``... then all points $x$ on the open segment bounded by $x_0$ and $x_1$ are interior points of $K$", we note if this is not true, then we can find $y$ such that  for all $t>0$ or $t<0$, $x+ty \not\in K$. Without loss of generality, assume $x+ty\not \in K$, $\forall t>0$. For $t$ sufficiently small, $x_0 + ty \in K$. so the segment $[x_1, x_0+ty] \subset K$. But this necessarily intersects with the ray $x+ty$, $t>0$. A contradiction. (iv) We can summarize the idea of the proof as follows. One dimension is clear, so by using induction, we have two scenarios. Scenario one, $K$ has no interior points. Then the dimension is reduced by 1 and we are done. Scenario two, $K$ has interior points. Then intuition shows any interior point lies on a segment with one endpoint an extreme point and the other a boundary point; a boundary point resides on a hyperplane, whose dimension is reduced by 1. By induction, we are done.

\bigskip

\noindent  $\blacktriangleright$ 1. (page 188) Verify that these are convex sets. \begin{proof}The verification is straightforward and we omit it.\end{proof}

\noindent  $\blacktriangleright$ 2. (page 188) Prove these propositions. \begin{proof}These propositions are immediate consequences of the definition of convexity. \end{proof}

\noindent  $\blacktriangleright$ 3. (page 188) Show that an open half-space (3) is an open convex set.
\begin{proof}Fix a point $x\in \{z:l(z)<c\}$. For any $y\in X$, $f(t)=l(x+ty)=l(x)+tl(y)$ is a continuous function of $t$, with $f(0)=l(x)<c$. By continuity, $f(t)<c$ for $t$ sufficiently small. So $x+ty \in \{z:l(z)<c\}$ for $t$ sufficiently small, i.e. $x$ is an interior point. Since $x$ is arbitrarily chosen, we have proved $\{z:l(z)<c\}$ is open.\end{proof}

\noindent  $\blacktriangleright$ 4. (page 188) Show that if $A$ is an open convex set and $B$ is convex, then $A+B$ is open and convex.
\begin{proof}The convexity of $A+B$ is Theorem 1(b). To see the openness, $\forall x\in A$, $y\in B$. For any $z\in X$, $(x+y)+tz = (x+tz)+y$. For $t$ sufficiently small, $x+tz\in A$. So $(x+y)+tz \in A+B$ for $t$ sufficiently small. This shows $A+B$ is open. \end{proof}

\noindent  $\blacktriangleright$ 5. (page 188) Let $X$ be a Euclidean space, and let $K$ be the open ball of radius $a$ centered at the origin: $|\!| x |\!| < a$.

(i) Show that $K$ is a convex set.

(ii) Show that the gauge function of $K$ is $p(x) = |\!|x|\!|/a$.
\begin{proof}That $K$ is a convex set is trivial to see. It's also clear that $p(0)=0$. For any $x\in\mathbb R^n\setminus\{0\}$, when $\varepsilon\in (0,a)$, $r_{\varepsilon}=\frac{|\!|x|\!|}{a-\varepsilon}$ satisfies $r_{\varepsilon}>0$ and $x/r_{\varepsilon}\in K$. So $p(x)\le \frac{|\!|x|\!|}{a-\varepsilon}$. By letting $\varepsilon\to 0$, we conclude $p(x)\le |\!|x|\!|/a$. If $``<"$ holds, we can find $r>0$ such that $r<\frac{|\!|x|\!|}{a}$ and $\frac{x}{r}\in K$. But $r<\frac{|\!|x|\!|}{a}$ implies $a<\frac{|\!|x|\!|}{r}$ and hence $\frac{x}{r}\not \in K$. Contradiction. Combined, we conclude $p(x)=\frac{|\!|x|\!|}{a}$. \end{proof}

\noindent  $\blacktriangleright$ 6. (page 188) In the $(u,v)$ plane take $K$ to be the quarter-plane $u<1$, $v<1$. Show that the gauge function of $K$ is
\[
p(u,v) = \begin{cases}
\begin{array}{cc}
  0 & \text{if $u\le 0$, $v\le 0$,} \\
  v & \text{if $0<v$, $u\le 0$,} \\
  u & \text{if $0<u$, $v\le 0$,} \\
  \max(u,v) & \text{if $0<u$, $0<v$.}
\end{array}
\end{cases}
\]
\begin{proof}See the textbook's solution. \end{proof}

\noindent  $\blacktriangleright$ 7. (page 190) Let $p$ be a positive homogeneous, subadditive function. Prove that the set $K$ consisting of all $x$ for which $p(x)<1$ is convex and open.
\begin{proof}$\forall x, y \in K$, we have $p(x)<1$ and $p(y)<1$. For any $a\in [0,1]$, $p(ax+(1-a)y)\le p(ax)+p((1-a)y) = ap(x) +(1-a)p(y)<a+(1-a)=1$. This shows $K$ is convex. To see $K$ is open, fix $x\in K$ and choose any $z\in X$. Then $p(x+tz)\le p(x)+tp(z)$. So for $t$ sufficiently small such that $tp(z)<1-p(x)$, we have $p(x+tz)\le p(x)+tp(z) < p(x) + 1- p(x)=1$, i.e. $x+tz\in K$. This shows $K$ is open. \end{proof}

\noindent  $\blacktriangleright$ 8. (page 193) Prove that the support function $q_S$ of any set is {\it subadditive}; that is, it satisfies $q_S(m+l) \le q_S(m) + q_S(l)$ for all $l$, $m$ in $X'$.
\begin{proof}$\forall \varepsilon >0$, there exists $x(\varepsilon)\in S$, so that
\[
q_S(m+l)=\sup_{x\in S}(l+m)(x) < (l+m)(x(\varepsilon))+\varepsilon \le \sup_{x\in S}l(x) + \sup_{x\in S}m(x) + \varepsilon = q_S(m)+q_S(l)+\varepsilon.\]
 By the arbitrariness of $\varepsilon$, we conclude $q_S(m+l)\le q_S(m)+q_S(l)$.\end{proof}

\noindent  $\blacktriangleright$ 9. (page 193) Let $S$ and $T$ be arbitrary sets in $X$. Prove that $q_{S+T}(l)=q_S(l)+q_T(l)$.
\begin{proof}$q_{S+T}(l)=\sup_{x\in S, y\in T}l(x+y)=\sup_{x\in S, y\in T}[l(x)+l(y)]\le \sup_{x\in S, y\in T}[q_S(l)+q_T(l)]=q_S(l)+q_T(l)$. Conversely, $\forall \varepsilon>0$, there exists $x_0\in S$, $y_0\in T$, s.t. $q_S(l) < l(x_0) + \frac{\varepsilon}{2}$, $q_T(l)<l(y_0)+\frac{\varepsilon}{2}$. So $q_S(l)+q_T(l) < l(x_0+y_0) + \varepsilon \le q_{S+T}(l)+\varepsilon$. By the arbitrariness of $\varepsilon$, $q_S(l)+q_T(l)\le q_{S+T}(l)$. Combined, we get $q_{S+T}(l)=q_S(l)+q_T(l)$.\end{proof}

\noindent  $\blacktriangleright$ 10. (page 193) Show that $q_{S\cup T}(l)=\max\{q_S(l), q_T(l)\}$.
\begin{proof} $q_{S\cup T}(l)=\sup_{x\in S \cup T}l(x)\ge \sup_{x\in S}l(x)=q_S(l)$. Similarly, $q_{S\cup T}(l)\ge q_T(l)$. Therefore, we have $q_{S\cup T}(l)\ge \max\{q_S(l),q_T(l)\}$. For any $\varepsilon >0$ sufficiently small, we can find $x_{\varepsilon}\in S\cup T$, such that $q_{S\cup T}(l)\le l(x_{\varepsilon})+\varepsilon$. But $l(x_{\varepsilon})\le \max\{q_S(l),q_T(l)\}$. So $q_{S\cup T}(l)\le \max\{q_S(l),q_T(l)\} + \varepsilon$. Let $\varepsilon\to 0$, we get $q_{S\cup T}(l)\le \max\{q_S(l),q_T(l)\}$. Combined, we can conclude $q_{S\cup T}(l)= \max\{q_S(l),q_T(l)\}$.\end{proof}

\noindent  $\blacktriangleright$ 11. (page 194) Show that a closed half-space as defined by (4) is a closed convex set.
\begin{proof}If for any $a\in (0,1)$, $l(ax+(1-a)y)=al(x)+ (1-a)l(y)\le c$, by continuity, we have $l(x)\le c$ and $l(y)\le c$. This shows $\{x:l(x)\le c\}$ is a closed convex set. \end{proof}

\noindent  $\blacktriangleright$ 12. (page 194) Show that the closed unit ball in Euclidean space, consisting of all points $|\!| x|\!| \le 1$, is a closed convex set.
\begin{proof}
Convexity is obvious. For closedness, note $f(t) = |\!|tx+(1-t)y|\!|$ is a continuous function of $t$. So if $f(t) \le t$ for any $t\in (0,1)$, $f(0)= |\!|y|\!|\le 1$ and $f(1)=|\!|x|\!| \le 1$. So the unit ball $B(0,1)$ is closed. Combined, we conclude $B(0,1)=\{x: |\!|x|\!|\le 1\}$ is a closed convex set.
\end{proof}

\noindent  $\blacktriangleright$ 13. (page 194) Show that the intersection of closed convex sets is a closed convex set.
\begin{proof}Suppose $H$ and $K$ are closed convex sets. Theorem 1(a) says $H\cap K$ is also convex. Moreover, if for any $a\in (0,1)$, $ax+(1-a)y\in H\cap K$, then the closedness of $H$ and K implies $a,b\in H$ and $a,b \in K$, i.e. $a,b\in H\cap K$. So $H\cap K$ is closed.\end{proof}

\noindent  $\blacktriangleright$ 14. (page 194) Complete the proof of Theorems 7 and 8.
\begin{proof}{\bf Proof of Theorem 7}: Suppose $K$ has an interior point $x_0$. If a linear function $l$ and a real number $c$ determine a closed half-space that contains $K-x_0$ but not $y-x_0$, i.e. $l(x-x_0)\le c$, $\forall x\in K$ and $l(y-x_0)>c$, then $l$ and $c+l(x_0)$ determine a closed half-space that contains $K$ but not $y$, i.e. $l(x)\le c+l(x_0)$ and $l(y) > c+l(x_0)$. So without loss of generality, we can assume $x_0=0$. Note the convexity and closedness are preserved under translation, so this simplification is all right for this problem's purpose.

Define gauge function $p_K$ as in (5). Then we can show $p_K(x)\le 1$ if and only if $x\in K$. Indeed, if $x\in K$, then $p_K(x)\le 1$ by deifinition. Conversely, if $p_K(x)\le 1$, then for any $\varepsilon >0$, there exists $r(\varepsilon) < 1+ \varepsilon$ so that $\frac{x}{r(\varepsilon)}\in K$. We choose $r(\varepsilon)>1$ and note $\frac{x}{r(\varepsilon)}=a(\varepsilon)\cdot 0 + (1-a(\varepsilon))\cdot x$ with $a(\varepsilon) = 1-\frac{1}{r(\varepsilon)}$. As $r(\varepsilon)$ can be as close to 1 as we want when $\varepsilon\downarrow 0$, $a(\varepsilon)$ can be as close to 0 as we want. Meanwhile, 0 is an interior point of $K$, so for $r$ large enough, $\frac{x}{r}\in K$. This shows for $a$ close enough to 1, $a\cdot 0 + (1-a)\cdot x\in K$. Combined, we conclude $K$ contains the open segment $\{a\cdot 0+(1-a)\cdot x:0<a<1\}$. By definition of closedness, $x\in K$. The rest of the proof is completely analogous to that of Theorem 3, with $p(x)<1$ replaced by $p(x)\le 1$.

If $K$ has no interior point, we have two possibilities. Case one, $y$ and $K$ are not on the same hyperplane. In this case, there exists a linear function $l$ and a real number $c$, such that $l(x)=c (\forall x\in K)$ but $l(y)\ne c$. By considering $-l$ if necessary, we can have $l(x)=c (\forall x \in K)$ and $l(y)>c$. So the half-space $\{x:l(x)\le c\}$ contains $K$, but not $y$. Case two, $y$ and $K$ reside on the same hyperplane. Then the dimension of the ambient space for $y$ and $K$ can be reduced by 1. Work by induction and note the space is of finite dimension, we can finish the proof.

{\bf Proof of Theorem 8}: By definition of (16), if $x\in K$, then $l(x)\le q_K(l)$. Conversely, suppose $y$ is not in $K$, then there exists $l\in X'$ and a real number $c$ such that $l(x)\le c$, $\forall x\in K$ and $l(y)>c$. This implies $l(y)>q_K(l)$. Combined, we conclude $x\in K$ if and only if $l(x)\le q_K(l)$, $\forall l\in X'$.

{\it Remark:} From the above solution and the proof of Theorem 3, we can see a useful routine for proving results on convex sets: first assume the convex set has an interior point and  use the gauge function, which often helps to construct the desired linear functionals via Hahn-Banach Theorem. If there exists no interior point, reduce the dimension by 1 and work by induction. Such a use of interior points as the criterion for a dichotomy is also present in the proof of Theorem 10 (Carath\'{e}odory).
 \end{proof}

\noindent  $\blacktriangleright$ 15. (page 195) Prove Theorem 9.
\begin{proof}Denote by $\widehat S$ the closed convex hull of $S$, and define $\Gamma_l = \{x:l(x)\le q_S(l)\}$ where $l\in X'$. Then it is easy to see each $\Gamma_l$ is a closed convex set containing $S$, so $\widehat S\subseteq \cap_{l\in X'}\Gamma_l$. For the other direction, suppose $\cap_{l\in X'}\Gamma_l \setminus \widehat S\ne \emptyset$ and we choose a point $x$ from $\cap_{l\in X'}\Gamma_l \setminus \widehat S$. By Theorem 8, there exists $l_0\in X'$ such that $l_0(x)>q_{\widehat S}(l_0)\ge q_S(l_0)$. So $x\not \in \Gamma_{l_0}$, contradiction. Combined, we conclude $\widehat S = \cap_{l\in X'}\Gamma_l = \{x:l(x)\le q_S(l), \forall l \in X'\}$.  \end{proof}

\noindent  $\blacktriangleright$ 16. (page 195) Show that if $x_1$, $\cdots$, $x_m$ belong to a convex set, then so does any convex combination of them.
\begin{proof}Suppose $\lambda_1$, $\cdots$, $\lambda_m$ satisfy $\lambda_1$, $\cdots$, $\lambda_m \in (0,1)$ and $\sum_{i=1}^m\lambda_i=1$. We need to show $\sum_{i=1}^{m}\lambda_ix_i\in K$, where $K$ is the convex set to which $x_1$, $\cdots$, $x_m$ belong. Indeed, since $\sum_{i=1}^m\lambda_ix_i=(\lambda_1+\cdots+\lambda_{m-1})\sum_{i=1}^{m-1}\frac{\lambda_i}{\lambda_1+\cdots+\lambda_{m-1}}x_i+\lambda_mx_m$, it suffices to show $\sum_{i=1}^{m-1}\frac{\lambda_i}{\lambda_1+\cdots+\lambda_{m-1}}x_i\in K$. Working by induction, we are done. \end{proof}

\noindent  $\blacktriangleright$ 17. (page 195) Show that an interior point of $K$ cannot be an extreme point.
\begin{proof}
Suppose $x$ is an interior point of $K$. $\forall y \in X$, for $t$ sufficiently small, $x+ty\in K$. In particular, we can find $\varepsilon >0$ so that $x+\varepsilon y \in K$ and $x-\varepsilon y \in K$. Since $x$ can be represented as $x=\frac{(x+\varepsilon y)+(x-\varepsilon y)}{2}$, we conclude $x$ is not an extreme point.
\end{proof}

 \noindent  $\blacktriangleright$ 18. (page 197) Verify that every permutation matrix is a doubly stochastic matrix.
 \begin{proof}
 Let $S$ be a permutation matrix as defined in formula (25). Then clearly $S_{ij}\ge 0$. Furthermore, $\sum_{i=1}^nS_{ij} = \sum_{i=1}^n\delta_{p(i)j}$, where $j$ is fixed and is equal to $p(i_0)$ for some $i_0$. So $\sum_{i=1}^nS_{ij}=1$. Finally, $\sum_{j=1}^nS_{ij} = \sum_{j=1}^n\delta_{ip^{-1}(j)}$, where $i$ is fixed and is equal to $p^{-1}(j_0)$ for some $j_0$. So $\sum_{j=1}^nS_{ij}=1$. Combined, we conclcude $S$ is a doubly stochastic matrix.
 \end{proof}

\noindent  $\blacktriangleright$ 19. (page 199) Show that, except for two dimensions, the representation of doubly stochastic matrices as convex combinations of permutation matrices is not unique.
\begin{proof} The textbook's solution demonstrates the case of dimension 3. Counterexamples for higher dimensions can be obtained by building permutation matrices upon the case of dimension 3. \end{proof}

\noindent  $\blacktriangleright$ 20. (page 201) Show that if a convex set in a finite-dimensional Euclidean space is open, or closed, or bounded in the linear sense defined above, then it is open, or closed, or bounded in the topological sense, and conversely.

\begin{proof}

Suppose $K$ is a convex subset of an n-dimension linear space $X$. We have the following properties.

 (1) If $x$ is an interior point of $K$ in the linear sense, then $x$ is an interior point of $K$ in the topological sense. Consequently, being open in the linear sense is the same as being open in the topological sense.

 Indeed, let $e_1$, $\cdots$, $e_n$ be a basis of $X$. There exists $\varepsilon >0$ so that for any $t_i\in (-\varepsilon, \varepsilon)$, $x+t_ie_i\in K$, $i=1,\cdots, n$. For any $y \in X$ which is close enough to $x$, the norm of $y-x$ can be very small so that if we write $y$ as $y=x+\sum_{i=1}^na_ie_i$, $|a_i|<\frac{\varepsilon}{n}$. Since for $t_i\in (-\frac{\varepsilon}{n}, \frac{\varepsilon}{n})$ $(i=1,\cdots, n)$, $x+\sum_{i=1}^nt_ie_i = \sum_{i=1}^n\frac{1}{n}(x+nt_ie_i)\in K$ by the convexity of $K$, we conclude $y\in K$ if $y$ is sufficiently close to $x$. This shows $x$ is an interior point of $K$.


 (2) If $K$ is closed in the linear sense, it is closed in the topological sense.

 Indeed, suppose $(x_k)_{k=1}^{\infty}\subseteq K$ and $x_k\to x$ in the topological sense, we need to show $x\in K$. We work by induction. The case $n=1$ is trivial, because $x$ is necessarily an endpoint of a segment contained in $K$. Assume the property is true any $n\le N$. For $n= N+1$, we have two cases to consider. Case one, $K$ has no interior points. Then as argued in the proof of Theorem 10, $K$ is contained in a subspace of $X$ with dimension less than $n$. By induction, $K$ is closed in the topological sense and hence $x\in K$. Case two, $K$ has at least one interior point $x_0$. In this case, all the points on the open segment $(x_0,x)$ must be in $K$. Indeed, assume not, then there exists an $x^*\in (x_0,x)$ such that the open segment $(x_0,x^*)\subseteq K$, but $(x^*,x]\cap K=\emptyset$. Since $x_0$ is an interior point of $K$, we can find $n$ linearly independent vectors $e_1$, $\cdots$, $e_n$ so that $x_0+e_i\in K$, $i=1,\cdots, n$. For any $x_k$ sufficiently close to $x$, the cone with $x_k$ as the vertex and $x_0+e_1, \cdots, x_0+e_n$ as the base necessarily intersects with $(x^*,x]$. So such an $x^*\in (x_0,x)$ with $(x^*,x]\cap K = \emptyset$ does not exist. Therefore $(x_0,x)\subseteq K$ and by definition of being closed in the linear sense, we conclude $x\in K$.


(3) If $K$ is bounded in the linear sense, it is bounded in the topological sense.

Indeed, assume $K$ is not bounded in the topological sense, then we can find a sequence $(x_k)_{k=1}^{\infty}$ such that $|\!|x_k|\!|\to \infty$. We shall show $K$ is not bounded in the linear sense. Indeed, if the dimension $n=1$, this is clearly true. Assume the claim is true for any $n\le N$. For $n=N+1$, we have two cases to consider. Case one, $K$ has no interior points. Then as argued in the proof of Theorem 10, $K$ is contained in a subspace of $X$ with dimension less than $n$. By induction, $K$ is not bounded in the linear sense. Case two, $K$ has at least one interior point $x_0$. Denote by $y_k$ the intersection of the segment $[x_0,x_k]$ with the sphere $S(x_0,1)=\{z:|\!|z-x_0|\!|=1\}$. For $k$ large enough, $y_k$ always exists. Since a sphere in finite-dimensional space is compact, we can assume without loss of generality that $y_k\to y\in S(x_0,1)$. Then by an argument similar to that of part (2) (the argument based on cone), the ray starting with $x_0$ and going through $y$ is contained in $K$. So $K$ is not bounded in the linear sense.

\end{proof}


\section{The Duality Theorem}

The book's own solution gives answers to Ex 3.

\bigskip

$\bigstar$ {\bf Comments}: For a linear equation $Ax=y$ to have solution, it is necessary and sufficient that $y\in R(A)=N(A)^{\perp}$. This observation of duality helps us determine the existence of solution. In optimization theory, if the collection of points satisfying certain constraint is a convex set, we use the hyperplane separation theorem to find and state the necessary and sufficient condition for the existence of solution.

\bigskip

\noindent  $\blacktriangleright$ 1. (page 205) Show that $K$ defined by (5) is a convex set.
\begin{proof} Let $Y=\{y: y=\sum_{i=1}^m p_jy_j, p_j\ge 0\}$. If $y, y'\in Y$, then
\[
ty+(1-t)y'=\sum_{i=1}^m \bigl[tp_j+(1-t)p_j'\bigr]y_j \in Y.
\]
So $Y$ is a convex set.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 205) Show that if $x \ge z$ and $\xi \ge 0$, then $\xi x \ge \xi z$.
\begin{proof}$\xi x- \xi z= \xi (x-z)\ge 0$.\end{proof}

\noindent  $\blacktriangleright$ 3. (page 208) Show that the sup and inf in Theorem 3 is a maximum and minimum. [{\it Hint}: The sign of equality holds in (21).]
\begin{proof} In the proof of Theorem 3, we already showed that there is an admissible $p^*$ for which $\gamma p^*\ge s$ (formula (21)). Since $S \ge \gamma p^* \ge s \ge S$ by formula (16) and (20), the $\sup$ in Theorem 3 is obtained at $p^*$, hence a maximum. To see the $\inf$ in Theorem 3 is a minimum, note under the condition that there are admissible $p$ and $\xi$, Theorem 3 can be written as
\[
\sup\{\gamma p: y\ge Yp, p\ge 0\}=\inf \{\xi y: \gamma\le \xi Y, \xi\ge 0\}\ne \infty.
\]
This is equivalent to
\[
\inf \{(-\gamma)p: (-y)\le (-Y)p, p\ge 0\} = \sup\{\xi (-y): (-\gamma)\ge \xi (-Y), \xi \ge 0\}.
\]
By previous argument for $S=\sup_{\gamma}\gamma p$, we can find $\xi^*$ such that $\xi^*\ge 0$, $(-\gamma)\ge \xi^*(-Y)$ and $\xi^*(-y)=\sup\{\xi(-y): (-\gamma)\ge \xi(-Y),\xi \ge 0\}$, i.e. $\xi^*\ge 0$, $\gamma\le \xi^*Y$, and $\xi^*y = \inf\{\xi y: \gamma \le \xi Y, \xi \ge 0\}$. That is, the $\inf$ in Theorem 3 is obtained at $\xi^*$, hence a minimum.
\end{proof}


\section{Normed Linear Spaces}

The book's own solution gives answers to Ex 2, 5, 6.

\bigskip

$\bigstar$ {\bf Comments}: The geometric intuition of Theorem 9 is clear if we identify $X'$ with $X$ and assume $X$ is an inner product space.

\bigskip

\noindent  $\blacktriangleright$ 1. (page 214) (a) Show that the open and closed unit balls are convex.

(b) Show that the open and closed unit balss are symmetric with respect to the origin, that is, if $x$ belongs to the unit ball, so does $-x$.
\begin{proof}Trivial and proof is omitted.\end{proof}

\noindent  $\blacktriangleright$ 2. (page 215) Prove the {\it triangle inequality}, that is, for all $x$, $y$, $z$ in $X$,
\[
|x-z| \le |x-y| + |y-z|.
\]
\begin{proof}$|x-z|=|(x-y)+(y-z)|\le |x-y|+|y-z|$.\end{proof}

\noindent  $\blacktriangleright$ 3. (page 215) Prove that $|x|_1$ defined by (5) has all three properties (1) of a norm. \begin{proof}(i) $|x|_1\ge 0$, and $|x|_1=0$ if and only if each $a_j=0$, i.e. $x=0$.

(ii) $|x+y|_1=\sum |x_j+y_j|\le \sum |x_j| + \sum |y_j| = |x|_1 + |y|_1$.

(iii) $|kx|_1=\sum |kx_j| = \sum |k||x_j| = |k||x|_1$.
\end{proof}

\noindent  $\blacktriangleright$ 4. (page 216) Prove or look up a proof of H\"{o}lder's inequality.
\begin{proof} $f(x)=-\ln x$ is a strictly convex function on $(0,\infty)$. So for any $a,b>0$ with $a\ne b$, we have
\[
f(\theta a+(1-\theta)b)\le \theta f(a) + (1-\theta)f(b), \; \forall \theta \in [0,1],
\]
where ``=" holds if and only if $\theta a + (1-\theta) b = a$ or $b$. That is, one of the following three cases occurs: 1) $\theta=0$; 2) $\theta=1$; 3) $a=b$.

We note inequality $f(\theta a+(1-\theta)b)\le \theta f(a) + (1-\theta)f(b)$ is equivalent to $a^{\theta}b^{1-\theta}\le \theta a +(1-\theta)b$, and by letting $a_i = \frac{|x_i|^p}{|x|_p^p}$ and $b_i =\frac{|y_i|^q}{|y|_q^q}$, we have $(\theta =\frac{1}{p})$
\[
\frac{|x_iy_i|}{|x|_p|y|_q}\le \frac{1}{p}\frac{|x_i|^p}{|x|_p^p}+\frac{1}{q}\frac{|x_i|^q}{|x|_q^q}.
\]
Taking summation gives $\sum_i |x_iy_i| \le |x|_p |y|_q$.

We consider when $\sum_i |x_iy_i| = |x|_p |y|_q$. Since $p,q$ are real positive numbers and since $\frac{1}{p}+\frac{1}{q}=1$, we must have $p,q \in (0,1)$. So among the three cases aforementioned, ``=" holds in $\sum_i |x_iy_i| \le |x|_p |y|_q$ if and only if for each $i$, $\frac{|x_i|^p}{|x|_p^p}=\frac{|y_i|^q}{|y|_q^q}$, that is, $(|x_1|^p,\cdots, |x_n|^p)$ are proportional to $(|y_1|^q,\cdots, |y_n|^q)$.

For $\sum_ix_iy_i=\sum|x_iy_i|$ to hold, we need $x_iy_i=|x_iy_i|$ for each $i$. This is the same as  $\mbox{sign}(x_i)=\mbox{sign}(y_i)$ for each $i$. In summary, we conclude $xy\le |x|_p|y|_q$ and the ``=" holds if and only if $(|x_1|^p,\cdots, |x_n|^p)$ and $(|y_1|^q,\cdots, |y_n|^q)$ are proportional to each other and $\mbox{sign}(x_i)=\mbox{sign}(y_i)$ $(i=1,\cdots, n)$.
\end{proof}

\noindent  $\blacktriangleright$ 5. (page 216) Prove that
\[
|x|_{\infty} = \lim_{p\to\infty} |x|_p,
\]
where $|x|_{\infty}$ is defined by (3).
\begin{proof}
Given $x$, we note
\[
|x|_p = |x|_{\infty}\left(\sum_{i=1}^n\frac{|x_i|^p}{|x|_{\infty}^p}\right)^{1/p}\;\mbox{and}\; 1 \le \left(\sum_{i=1}^n\frac{|x_i|^p}{|x|_{\infty}^p}\right)^{1/p} \le n^{1/p}.
\]
Letting $p\to\infty$, we can see $|x|_p\to |x|_{\infty}$.
\end{proof}

\noindent  $\blacktriangleright$ 6. (page 219) Prove that every subspace of a finite-dimensional normed linear space is closed.
\begin{proof}
Every linear subspace of a finite-dimensional normed linear space is again a finite-dimensional normed linear space. So the problem is reduced to proving any finite-dimensional normed space is closed. Fix a basis $e_1,\cdots, e_n$, we introduce the following norm: if $x=\sum a_je_j$, $|\!|x|\!|:=(\sum_j a_j^2)^{1/2}$. Then the original norm $|\cdot|$ is equivalent to $|\!|\cdot |\!|$. So $(x_k)_{k=1}^{\infty}$ is a Cauchy sequence under $|\cdot|$ if and only if $\{(a_{k1}, \cdots, a_{kn})\}_{k=1}^{\infty}$ is a Cauchy sequence in $\mathbb C^n$ or $\mathbb R^n$. Here $x_k=\sum_{j=1}^n a_{kj}e_j$. Since $\mathbb C^n$ and $\mathbb R^n$ are complete, we conclude there exists $(b_1,\cdots, b_n) \in \mathbb C^n$ or $\mathbb R^n$, so that $x_k\to x = \sum b_j e_j$ in $|\!|\cdot |\!|$ and hence in $|\cdot|$.
\end{proof}

\noindent  $\blacktriangleright$ 7. (page 221) Show that the infimum in Lemma 5 is a minimum.
\begin{proof}
If $(z_k)_{k=1}^{\infty}\subset Y$ is such that $|x-z_k|\to d:=\inf_{y\in Y}|x-y|$, then for $k$ sufficiently large, $|z_k|\le |z_k-x|+|x|\le 2d + |x|$. Note that $Y=\mbox{span}\{y_1,\cdots, y_n\}$ is a finite dimensional space, by Theorem 3 (ii), $(z_k)_{k=1}^{\infty}$ has a subsequence which converges to a point $y_0\in Y$. Then $\inf_{y\in Y}|x-y|$ is obtained at $y_0$.
\end{proof}

\noindent  $\blacktriangleright$ 8. (page 223) Show that $|l|'$ defined by (23) satisfies all postulates for a norm listed in (1).
\begin{proof}
(i) Positivity: $|\xi|'=0$ implies $\xi x =0$, $\forall x$ with $|x|=1$. So for any $y$ with $y\ne 0$, $\xi y = |y|\xi (y/|y|)=0$, i.e. $\xi \equiv 0$. So $|\xi|'=0$ implies $\xi =0$, which is equivalent to $\xi \ne 0$ implies $|\xi|'>0$. $|0|=0$ is obvious.

(ii) Subadditivity: $|\xi_1+\xi_2|=\sum_{|x|=1}(\xi_1+\xi_2)x\le \sup_{|x|=1}\xi_1x+\sup_{|x|=1}\xi_2x=|\xi_1|+|\xi_2|$.

(iii) Homogeneity: $|k\xi|=\sup_{|x|=1}k\xi x=|k|\sup_{|x|=1}\xi x = |k||\xi|$.
\end{proof}

\noindent  $\blacktriangleright$ 9. (page 228) (i) Show that for all rational $r$,
\[
(rx, y) = r(x,y).
\]

(ii) Show that for all real $k$,
\[
(kx,y) = k(x,y).
\]

(i)\begin{proof} By formula (47) and (48), it suffices to prove the equality for positive rational $r$. Suppose $r=\frac{q}{p}$ with $p, q\in \mathbb Z^+$. By formula (49) and by induction, we have
\[
\stackrel{n}{\overbrace{\left(x,y\right) + \cdots + \left(x,y\right)}} = \left(nx,y\right).
\]
Therefore
\[
p(rx,y)=(prx,y)=(qx,y)=q(x,y),\;\mbox{i.e.}\; (rx,y)=\frac{q}{p}(x,y)=r(x,y).
\]
\end{proof}

(ii)\begin{proof} For any given $k$, we can find a sequence of rational numbers $(r_n)_{n=1}^{\infty}$ such that $r_n\to k$ as $n\to\infty$. Then $k(x,y)=\lim_{n\to\infty} r_n(x,y)= \lim_{n\to \infty} (r_nx,y)= (\lim_{n\to\infty}r_nx,y)=(kx,y)$, where the third ``=" uses the fact that $(\cdot, y)$ defines a continuous linear functional on $X$.\end{proof}

\section{Linear Mappings Between Normed Linear Spaces}

The book's own solution gives answers to Ex 1, 3, 5, 6.

\bigskip

$\bigstar$ {\bf Erratum}: In Exercise 7 (p.236), it should be ``defined by formulas (3) and (5) in Chapter 14" instead of ``defined by formulas (3) and (4) in Chapter 14".
\bigskip

\noindent  $\blacktriangleright$  1. (page 230) Show that every linear map $T: X \to Y$ is continuous, that is, if $\lim x_n = x$, then $\lim Tx_n = Tx$.
\begin{proof}By Lemma 1, $|Tx_n-Tx|=|T(x_n-x)|\le c|x_n-x|$. So if $\lim_{n\to\infty}x_n=x$, then $\lim_{n\to\infty}Tx_n=Tx$.\end{proof}

\noindent  $\blacktriangleright$ 2. (page 235) Show that if for every $x$ in $X$, $|T_nx - Tx|$ tends to zero as $n\to \infty$, then $|T_n - T|$ tends to zero.
\begin{proof} Suppose $|T_n-T|$ does not tend to zero. Then there exists $\varepsilon > 0$ and a sequence $(x_n)_{n=1}^{\infty}$ such that $|x_n|=1$ and $|(T_n-T)x_n|\ge \varepsilon$. By Theorem 3(ii), we can without loss of generality assume $(x_n)_{n=1}^{\infty}$ converges to some point $x^*$. Then
\begin{eqnarray*}
|(T_n-T)x_n|-|(T_n-T)x^*|
&\le& |(T_n-T)x_n - (T_n-T)x^*| \\
&\le& |T(x_n-x^*)| + |T_n(x_n-x^*)| \\
&\le& |T||x_n-x^*| + |T_n||x_n-x^*|.
\end{eqnarray*}
For $n$ sufficiently large, $|(T_n-T)x_n|-|(T_n-T)x^*|$ will be greater than $\varepsilon/2$, while $|T||x_n-x^*| + |T_n||x_n-x^*|$ will be as small as we want, provided that we can prove $(|T_n|)_{n=1}^{\infty}$ is bounded. Indeed, this is the {\it principle of uniform boundedness} (see, for example, Lax \cite{Lax02}, Chapter 10, Theorem 3). Thus we have arrived at a contradiction which shows our assumption is wrong.
\end{proof}
\begin{remark}
Can we find an elementary proof without using the principle of uniform boundedness in functional analysis, especially since we are working with finite dimensional space?
\end{remark}

\noindent  $\blacktriangleright$ 3. (page 235) Show that $T_n = \sum_0^n R^k$ converges to $S^{-1}$ in the sense of definition (16).
\begin{proof}First of all, $\sum_{k=0}^{\infty}R^k$ is well-defined, since by $|R|<1$, $\left(\sum_{k=0}^KR^k\right)_{K=0}^{\infty}$ is a Cauchy sequence in $X'$. Then we note $S\sum_{k=0}^{\infty}R^k=\sum_{k=0}^{\infty}R^k-\sum_{k=1}^{\infty}R^k = I$ and $(\sum_{k=0}^{\infty}R^k)S = \sum_{k=0}^{\infty}R^k-\sum_{k=1}^{\infty}R^k=I$. So $S$ is invertible and $S^{-1}=\sum_{k=0}^{\infty}R^k$. \end{proof}

\noindent  $\blacktriangleright$ 4. (page 235) Deduce Theorem 5 from Theorem 6 by factoring $S=T+S-T$ as $T[I-T^{-1}(S-T)]$.
\begin{proof}
Assume all the conditions in Theorem 5. Define $R=-T^{-1}(S-T)$, then $|R|\le |T^{-1}||S-T|<1$. So by Theorem 6, $I-R = T^{-1}S$ is invertible, hence $S= T\circ T^{-1}S$ is invertible.
\end{proof}

\noindent  $\blacktriangleright$ 5. (page 235) Show that Theorem 6 remains true if the hypothesis (17) is replaced by the following hypothesis. For some positive integer $m$,
\[
|R^m|<1.
\]
\begin{proof} If for some $m$, $|R^m|<1$, we define $U= \sum_{k=0}^{\infty}R^{km}=I+R^mU$. $U$ is well-defined, and the following linear map is also well-defined: $V=U+RU+\cdots+R^{m-1}U$. Then $SV=U+RU+\cdots+R^{m-1}U-(RU+R^2U+\cdots+R^mU)=U-R^mU=I$. This shows $S$ is invertible.
\end{proof}

\noindent  $\blacktriangleright$ 6. (page 235) Take $X=Y=\mathbb R^n$, and $T: X\to X$ the matrix $(t_{ij})$. Take for the norm $|x|$ the maximum norm $|x|_{\infty}$ defined by formula (3) of Chapter 14. Show that the norm $|T|$ of the matrix $(t_{ij})$, regarded as a mapping of $X$ into $X$, is
\[
|T| = \max_i \sum_j |t_{ij}|.
\]
\begin{proof}
For any $x\in \mathbb R^n$, $|Tx|_{\infty}=\max_i|\sum_{j=1}^nt_{ij}x_j|\le \max_i(\sum_{j=1}^n|t_{ij}|)|x|_{\infty}$. So $|T| = \sup_{x\ne 0}\frac{|Tx|_{\infty}}{|x|_{\infty}}\le \max_i\sum_j|t_{ij}|$. For the other direction, suppose $\sum_j|t_{i_0j}| = \max_i\sum_j|t_{ij}|$ and we choose
\[
x^*=(\mbox{sign}(t_{i_01}),\cdots, \mbox{sign}(t_{i_0n}))^T,
 \]
then $|x^*|_{\infty}=1$ and $Tx^*=(\sum_jt_{1j}x_j^*,\cdots, \sum_jt_{i_0j}x_j^*,\cdots, \sum_jt_{nj}x_j^*)^T$. So
\[
|Tx^*|_{\infty} \ge \sum_j|t_{i_0j}| = \max_i\sum_j|t_{ij}|.
\]
This implies $|T|\ge \max_i\sum_j|t_{ij}|$. Combined, we conclude $|T|=\max_i\sum_j|t_{ij}|$.
\end{proof}

\noindent  $\blacktriangleright$ 7. (page 236) Take $X$ to be $\mathbb R^n$ normed by the maximum norm $|x|_{\infty}$, $Y$ to be $\mathbb R^n$ normed by the 1-norm $|x|_1$, defined by formulas (3) and (5) in Chapter 14. Show that the norm of the matrix $(t_{ij})$ regarded as a mapping of $X$ into $Y$ is bounded by
\[
|T| \le \sum_{i,j} |t_{ij}|.
\]
\begin{proof} For any $x=(x_1,\cdots, x_n)'$, we have
\[
|Tx|_l = \sum_{i=1}^n\left|\sum_{j=1}^nt_{ij}x_j\right|\le \sum_{i=1}^n\sum_{j=1}^n\left|t_{ij}x_j\right| \le \sum_{i,j}|t_{ij}||x|_{\infty}.
\]
So $|T| = \sup_{|x|_{\infty}=1}\frac{|Tx|_{l}}{|x|_{\infty}} \le \sum_{i,j}|t_{ij}|$.
\end{proof}

\noindent  $\blacktriangleright$ 8. (page 236) $X$ is any finite-dimensional normed linear space over $\mathbb C$, and $T$ is a linear mapping of $X$ into $X$. Denote by $t_j$ the eigenvalues of $T$, and denoted by $r(T)$ its spectral radius:
\[
r(T) = \max |t_j|.
\]

(i) Show that $|T| \ge r(T)$.

(ii) Show that $|T^n| \ge r(T)^n$.

(iii) Show, using Theorem 18 of Chapter 7, that
\[
\lim_{n\to \infty} |T^n|^{1/n} = r(T).
\]
\begin{proof} The proof is very similar to the content on page 97, Chapter 7, the material up to Theorem 18. So we omit the proof. \end{proof}

\section{Positive Matrices}

The book's own solution gives answers to Ex 1, 2.

\bigskip

$\bigstar$ {\bf Comments}: To see the property of complex numbers mentioned on p.240, we note if $z_1$, $z_2\in \mathbb C$, then $|z_1+z_2|=|z_1|+|z_2|$ if and only if $\arg z_1=\arg z_2$. For $n\ge 3$, if $|\sum_{i=1}^n z_i|=\sum_{i=1}^n|z_i|$, then
\[
\left|\sum_{i=1}^n z_i \right| \le \left|\sum_{i=3}^n z_i \right| + \left|z_1+z_2 \right| \le \left|\sum_{i=3}^n z_i \right| + |z_1|+|z_2| \le \sum_{i=1}^n |z_i| = \left|\sum_{i=1}^n z_i \right|.
\]
So $|z_1+z_2|=|z_1|+|z_2|$ and hence $\arg z_1 = \arg z_2$. Then we work by induction.

\bigskip

\noindent  $\blacktriangleright$ 1. (page 240) Denote by $t(P)$ the set of nonnegative $\lambda$ such that
\[
Px \le \lambda x, x\ge 0
\]
for some vector $x\ne 0$. Show that the dominant eigenvalue $\lambda(P)$ satisfies
\[
\lambda(P)=\min_{\lambda \in t(P)}\lambda.
\]
\begin{proof}Let $x^*=(1,\cdots,1)^T$ and
$\lambda^*=\max_{1\le i\le n}\sum_{j=1}^np_{ij}$, then $Px^*\le
\lambda^*x^*$. So $t(P)\ne \emptyset$ and $t^*(P)=\{0\le \lambda\le
\lambda^*:\lambda\in t(P)\}$ is a bounded, nonempty set. We show
further $t^*(P)$ is closed. Suppose $(\lambda^m)^{\infty}_{m=1}
\subset t^*(P)$ converges to a point $\lambda$. Denote by $x^m$ the
nonnegative and nonzero vector such that $Px^m \le \lambda^mx^m$.
Without loss of generality, we assume $\sum_{i=1}^nx_i^m=1$. Then
$(x^m)_{m=1}^{\infty}$ is bounded and we can assume $x^m\to x$ for
some $x\ge 0$ with $\sum_{i=1}^nx_i=1$. Passing to limit gives us
$Px\le \lambda x$. Clearly $0\le \lambda\le\lambda^*$. So
$\lambda\in t^*(P)$. This shows $t^*(P)$ is compact and $t(P)$ has a
minimum $\bar\lambda$.

Denote by $\bar x$ the nonzero and nonnegative vector such that
$P\bar x\le \bar\lambda \bar x$. We show we actually have $P\bar
x=\bar\lambda \bar x$. Assume not, there must exist some $k\in
\{1,\cdots,n\}$ such that $\sum_{j=1}^np_{ij}\bar x_{ij} \le
\bar\lambda\bar x_i$ for $i\ne k$ and $\sum_{j=1}^np_{kj}\bar x_j <
\lambda x\lambda x_k$. Consider the vector $\widehat x=\bar
x-\varepsilon e_k$, where $\varepsilon>0$ and $e_k$ has the k-th
component equal to 1 with all the other components zero. Then in the
inequality $P\bar x\le \bar\lambda\bar x$, each component of LHS is
decreased when $\bar x$ is replaced by $\widehat x$, while only the
k-th component of RHS is decreased by an amount of $\lambda
\varepsilon$. So for $\varepsilon$ small enough, $P\widehat x
<\bar\lambda\widehat x$, and we can find a $\widehat \lambda < \bar
\lambda$ such that $P\widehat x\le \widehat \lambda \widehat x$.
Note $\bar\lambda >0$ (otherwise $\bar x=0$, a contradiction), so we
can also let $\widehat \lambda >0$. This contradicts with
$\bar\lambda =\min_{\lambda \in t(P)}\lambda$. We have shown $\bar
\lambda >0$ is an eigenvalue of $P$ which has a nonzero, nonnegative
eigenvalue. By Theorem 1(iv), $\bar\lambda = \lambda (P)$.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 243) Show that if some power $P^m$ of $P$ is positive, then $P$ has a dominant positive eigenvalue.
\begin{proof}$P^m$ has a dominant positive
eigenvalue $\lambda_0$. By Spectral Mapping Theorem, there is an
eigenvalue $\lambda$ of $P$, such that $\lambda^m=\lambda_0$.
Suppose $\lambda$ is real, then we can further assume $\lambda
>0$ by replacing $\lambda$ with $-\lambda$ if necessary. Then for any other eigenvalue
$\lambda'$ of $P$, Spectral Mapping Theorem implies $(\lambda')^m$
is an eigenvalue of $P^m$. So $|(\lambda')^m| < \lambda_0 =
\lambda^m$, i.e. $|\lambda'|<\lambda$.

To show we can take $\lambda$ as real, denote by $x$ the eigenvector of $P^m$ associated with $\lambda_0$. Then
\[
P^{m}x = \lambda_0x.
\]
Let $P$ act on this relation:
\[
P^{m+1}x = P^m(Px)=\lambda_0Px.
\]
This shows $Px$ is too an eigenvector of $P^m$ with eigenvalue $\lambda_0$. By Theorem 1(iv), $Px=cx$ for some positive number $c$. Repeated application of $P$ shows that $P^mx=c^mx$. Therefore $c^m=\lambda_0$. Let $\lambda = c$.
\end{proof}

\section{How to Solve Systems of Linear Equations}

$\bigstar$ {\bf Comments}: The three-term recursion formulae
\[
x_{n+1}=(s_nA+p_nI)x_n + q_nx_{n-1} - s_nb
\]
is introduced by Rutishauser et al. \cite{EGRS59}. See Papadrakakis \cite{Papadrakakis82} for a survey on a family of vector iterative methods with three-term recursion formulae and Golub and van Loan \cite{GV96} for a gentle introduction to the Chebyshev semi-iterative method (section \S10.1.5).

\bigskip

\noindent  $\blacktriangleright$ 1. (page 248) Show that $\kappa(A)$ is $\ge 1$.
\begin{proof}$I=AA^{-1}$. So $1=|I|=|AA^{-1}|\le |A||A^{-1}| = \kappa(A)$.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 254) Suppose $\kappa = 100$, $|\!| e_0 |\!| = 1$, and $(1/\alpha) F(x_0) = 1$; how large do we have to take $N$ in order to make $|\!| e_N |\!| < 10^{-3}$, (a) using the method in Section 1, (b) using the method in Section 2.
\begin{proof}If we use the method of Section 1, we need to solve for $N$ the following inequality: $\frac{2}{\alpha}(1-\frac{1}{\kappa})^NF(x_0)<10^{-3}$. Plug in numbers, we have $N>757$. If we use the method of Section 2,  we need to solve for $N$ the inequality $2(1+\frac{2}{\sqrt{\kappa}})^{-N}|\!|e_0|\!|<10^{-3}$. Plug in numbers, we have $N>42$. The numbers of steps needed in respective methods differ a great deal. \end{proof}

\noindent  $\blacktriangleright$ 3. (page 261) Write a computer program to evaluate the quantities $s_n$, $p_n$, and $q_n$.
\begin{solution} We first summarize the algorithm. We need to solve the system of linear equations $Ax=b$, where $b$ is a given vector and $A$ is an invertible matrix. We start with an initial guess $x_0$ of the solution and define $r_0 = Ax_0-b$. TO BE CONTINUED ... \end{solution}

\noindent  $\blacktriangleright$ 4. (page 261) Use the computer program to solve a system of equations of your choice.
\begin{solution}
We solve the following problem from the first edition of this book: Use the computer program in Exercise 3 to solve the system of equations
\[
Ax = f, \; A_{ij} = c + \frac{1}{i+j+1}, \; f_i = \frac{1}{i!},
\]
$c$ some nonnegative constant. Vary $c$ between $0$ and $1$, and the order $K$ of the system between 5 and 20. TO BE CONTINUED ... \end{solution}


\section{How to Calculate the Eigenvalues of Self-Adjoint Matrices}

\noindent $\blacktriangleright$ 1. (page 266) Show that the off-diagonal entries of $A_k$ tend to zero as $k$ tends
to $\infty$.
\begin{proof}
Skipped for this version.
\end{proof}

\noindent $\blacktriangleright$ 2. (page 266) Show that the mapping (16) is norm-preserving.
\begin{proof}
Skipped for this version.
\end{proof}

\noindent $\blacktriangleright$ 3. (page 270) (i) Show that $BL-LB$ is a tridiagonal matrix.

(ii) Show that if $L$ satisfies the differential equation (21), its entries satisfy
\begin{eqnarray*}
\frac{d}{dt} a_k = 2 (b_k^2 - b^2_{k-1}), \\
\frac{d}{dt} b_k = b_k (a_{k+1} - a_k),
\end{eqnarray*}
where $k=1, \cdots, n$ and $b_0 = b_n = 0$.

\appendix

\section{Appendix}

\subsection{Special Determinants}

\noindent  $\blacktriangleright$ 1. (page 304) Let
\[
p(s) = x_1 + x_2s + \cdots + x_n s^{n-1}
\]
by a polynomial of degree less than $n$. Let $a_1$, $\cdots$, $a_n$ be $n$ distinct numbers, and let $p_1$, $\cdots$, $p_n$ be $n$ arbitrary complex numbers; we wish to choose the coefficients $x_1$, $\cdots$, $x_n$ so that
\[
p(a_i) = p_i, i=1, \cdots, n.
\]
This is a system of $n$ linear equations for the $n$ coefficients $x_i$. Find the matrix of this system of equations, and how that its determinant is $\ne 0$.
\begin{solution}
The system of equations $p(a_i)=p_i$ ($i=1,\cdots, n$) can be written as
\begin{eqnarray*}
\left(\begin{matrix}
1 & a_1 & \cdots & a_1^{n-1} \\
1 & a_2 & \cdots & a_2^{n-1} \\
\cdots & \cdots & \cdots & \cdots \\
1 & a_n & \cdots & a_n^{n-1}
\end{matrix}\right)\left(\begin{matrix} x_1 \\ x_2 \\ \cdots \\ x_n \end{matrix}\right) = \left(\begin{matrix} p_1 \\ p_2 \\ \cdots \\ p_n \end{matrix}\right)
\end{eqnarray*}
Since $a_1$, $\cdots$, $a_n$ are distinct, by the formula for the determinant of Vandermonde matrix, the determinant of the matrix is equal to $\prod_{j>i}(a_j-a_i)$.
\end{solution}

\noindent  $\blacktriangleright$ 2. (page 304) Find an algebraic formula for the determinant of the matrix whose $ij$th element is
\[
\frac{1}{1+a_ia_j};
\]
here $a_1$, $\cdots$, $a_n$ are arbitrary scalars.
\begin{solution} Denote the matrix by $A$. We claim $\det
A = \frac{\prod_{j>i}(a_j-a_i)^2}{\prod_{i,j}(1+a_ia_j)}$. Indeed,
by subtracting column 1 from each of the other columns, we have
\begin{eqnarray*}
\det A &=& \det \left[
\begin{matrix}
\frac{1}{1+a_1^2} & \frac{1}{1+a_1a_2} & \frac{1}{1+a_1a_3} & \cdots
& \frac{1}{1+a_1a_n} \\
\frac{1}{1+a_2a_1} & \frac{1}{1+a_2^2} & \frac{1}{1+a_2a_3} & \cdots
& \frac{1}{1+a_2a_n} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
\frac{1}{1+a_ia_1} & \frac{1}{1+a_ia_2} & \frac{1}{1+a_ia_3} &
\cdots
& \frac{1}{1+a_ia_n} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
\frac{1}{1+a_na_1} & \frac{1}{1+a_na_2} & \frac{1}{1+a_na_3} &
\cdots & \frac{1}{1+a_n^2}
\end{matrix}\right] \\
&=& \det \left[
\begin{matrix}
\frac{1}{1+a_1^2} & \frac{a_1(a_2-a_1)}{(1+a_1^2)(1+a_1a_2)} &
\frac{a_1(a_3-a_1)}{(1+a_1^2)(1+a_1a_3)} & \cdots
& \frac{a_1(a_n-a_1)}{(1+a_1^2)(1+a_1a_n)} \\
\frac{1}{1+a_2a_1} & \frac{a_2(a_2-a_1)}{(1+a_2a_1)(1+a_2^2)} &
\frac{a_2(a_3-a_1)}{(1+a_2a_1)(1+a_2a_3)} & \cdots
& \frac{a_2(a_n-a_1)}{(1+a_2a_1)(1+a_2a_n)} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
\frac{1}{1+a_ia_1} & \frac{a_i(a_2-a_1)}{(1+a_ia_1)(1+a_ia_2)} &
\frac{a_i(a_3-a_1)}{(1+a_ia_1)(1+a_ia_3)} & \cdots
& \frac{a_i(a_n-a_1)}{(1+a_ia_1)(1+a_ia_n)} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
\frac{1}{1+a_na_1} & \frac{a_n(a_2-a_1)}{(1+a_na_1)(1+a_na_2)} &
\frac{a_n(a_3-a_1)}{(1+a_na_1)(1+a_na_3)} & \cdots &
\frac{a_n(a_n-a_1)}{(1+a_na_1)(1+a_n^2)}
\end{matrix}\right] \\
\end{eqnarray*}
By extracting the common factor $\frac{1}{1+a_ia_i}$ ($i=1,\cdots,
n$) from each row and $(a_j-a_1)$ ($j=2,\cdots,n$) from each column,
we have
\begin{eqnarray*}
\det A &=&
\frac{\prod_{j=2}^n(a_j-a_1)}{\prod_{i=1}^n(1+a_ia_1)}\det \left[
\begin{matrix}
1 & \frac{a_1}{1+a_1a_2} & \frac{a_1}{1+a_1a_3} & \cdots &
\frac{a_1}{1+a_1a_n} \\
1 & \frac{a_2}{1+a_2^2} & \frac{a_2}{1+a_2a_3} & \cdots &
\frac{a_2}{1+a_2a_n} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
1 & \frac{a_i}{1+a_ia_2} & \frac{a_i}{1+a_ia_3} & \cdots &
\frac{a_i}{1+a_ia_n} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
1 & \frac{a_n}{1+a_na_2} & \frac{a_n}{1+a_na_3} & \cdots &
\frac{a_n}{1+a_n^2}
\end{matrix}
\right]
\end{eqnarray*}
Subtracting row 1 from each of the other rows, we get
\[
\det A = \frac{\prod_{j=2}^n(a_j-a_1)}{\prod_{i=1}^n(1+a_ia_1)}\det
\left[
\begin{matrix}
1 & \frac{a_1}{1+a_1a_2} & \frac{a_1}{1+a_1a_3} & \cdots &
\frac{a_1}{1+a_1a_n} \\
0 & \frac{a_2-a_1}{(1+a_1a_2)(1+a_2^2)} &
\frac{a_2-a_1}{(1+a_1a_3)(1+a_2a_3)} & \cdots & \frac{a_2-a_1}{(1+a_1a_n)(1+a_2a_n)} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
0 & \frac{a_i-a_1}{(1+a_1a_2)(1+a_ia_2)} &
\frac{a_i-a_1}{(1+a_1a_3)(1+a_ia_3)} & \cdots & \frac{a_i-a_1}{(1+a_1a_n)(1+a_ia_n)} \\
\cdots & \cdots & \cdots & \cdots & \cdots \\
0 & \frac{a_n-a_1}{(1+a_1a_2)(1+a_na_2)} &
\frac{a_n-a_1}{(1+a_1a_3)(1+a_na_3)} & \cdots &
\frac{a_n-a_1}{(1+a_1a_n)(1+a_n^2)}
\end{matrix}
\right]
\]
By the Laplace expansion and extracting the common factor
$(a_i-a_1)$ from row 2 through $n$ and the common factor
$\frac{1}{1+a_1a_j}$ from column 2 through $n$, we get
\[
\det A =
\frac{\prod_{j=2}^n(a_j-a_1)^2}{\prod_{i=1}^n(1+a_ia_1)\prod_{j=2}^n(1+a_1a_j)}\det
\left[
\begin{matrix} \frac{1}{1+a_2^2} & \frac{1}{1+a_2a_3} & \cdots
& \frac{1}{1+a_2a_n}
\\
\cdots  & \cdots & \cdots & \cdots \\
\frac{1}{1+a_ia_2} & \frac{1}{1+a_ia_3} & \cdots &
\frac{1}{1+a_ia_n} \\
\cdots  & \cdots & \cdots & \cdots \\
\frac{1}{1+a_na_2} & \frac{1}{1+a_na_3} & \cdots & \frac{1}{1+a_n^2}
\end{matrix}
\right]
\]
By induction, we can prove our claim.
\end{solution}

\subsection{The Pfaffian}

\noindent  $\blacktriangleright$ 1. (page 306) Verify by a calculation Cayley's theorem for $n=4$.
\begin{proof}By the Laplace expansion and Exercise 16 of Chapter 5, we have
\begin{eqnarray*}
\det \left[\begin{matrix} 0 & a & b & c \\ -a & 0 & d & e \\ -b & -d & 0 & f \\ -c & -e & -f & 0 \end{matrix}\right]
&=& a\det \left[\begin{matrix}a & b & c \\ -d & 0 & f \\ -e & -f & 0 \end{matrix}\right] - b \det \left[\begin{matrix}a & b & c \\ 0 & d & e \\ -e & -f & 0 \end{matrix}\right] + c \det \left[\begin{matrix}a & b & c \\ 0 & d & e \\ -d & 0 & f\end{matrix}\right] \\
&=& a(-bef+cdf+af^2)-b(-be^2+cde+aef)+c(adf-bde+cd^2) \\
&=& -2abef + 2acdf - 2bcde + a^2f^2 + b^2 e^2 + c^2d^2 \\
&=& (af-be+cd)^2.
\end{eqnarray*}
\end{proof}

\subsection{Symplectic Matrices}

\noindent  $\blacktriangleright$ 1. (page 308) Prove that any real $2n\times 2n$ anti-self-adjoint matrix $A$, $\det A \ne 0$, can be written in the form
\[
A = FJF^T,
\]
$J$ defined by (1), $F$ some real matrix, $\det F \ne 0$.
\begin{proof} We work by induction. For $n=1$, $A$ has the form of $\left[\begin{matrix}0 & a \\ -a & 0\end{matrix}\right]$. Since $\det A\ne 0$, $a\ne 0$. We note
\[
\left[\begin{matrix}1 & 0 \\ 0 & \frac{1}{a} \end{matrix}\right]\left[\begin{matrix}0 & a \\ -a & 0\end{matrix}\right]\left[\begin{matrix}1 & 0 \\ 0 & \frac{1}{a} \end{matrix}\right]= \left[\begin{matrix}0 & 1 \\ -1 & 0 \end{matrix}\right].
\]
Now assume the claim is true for $1$, $\cdots$, $n$, we show it also holds for $n+1$. Indeed, we write $A$ into the form
\[
\left[
\begin{matrix}
0 & a &  *\\
-a & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right],
\]
where $A_1$ is a $2n\times 2n$ anti-self-adjoint matrix. Then
\[
\left[
\begin{matrix}
1 & 0 &  0_{1\times 2n}\\
0 & \frac{1}{a} & 0_{1\times 2n} \\
0_{2n\times 1} & 0_{2n\times 1} & I_{2n\times 2n} \\
\end{matrix}
\right]\left[
\begin{matrix}
0 & a &  *\\
-a & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right]\left[
\begin{matrix}
1 & 0 &  0_{1\times 2n}\\
0 & \frac{1}{a} & 0_{1\times 2n} \\
0_{2n\times 1} & 0_{2n\times 1} & I_{2n\times 2n} \\
\end{matrix}
\right]
=\left[\begin{matrix}
0 & 1 &  *\\
-1 & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right].
\]
Recall that multiplying an elementary matrix from the left is equivalent to an elementary row manipulation, while multiplying an elementary matrix from the right is equivalent to an elementary column manipulation. $A$ being anti-self-adjoint implies $A_{ij}=-A_{ji}$, so we can find a sequence of elementary matrices $U_1, U_2, \cdots, U_k$ such that
\[
U_k\cdots U_2 U_1 \left[\begin{matrix}
0 & 1 &  *\\
-1 & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right] U_1^TU_2^T\cdots U_k^T = \left[\begin{matrix}
0 & 1 & 0\\
-1 & 0 & 0 \\
0 & 0 & A_1 \\
\end{matrix}
\right].
\]
By assumption, $A_1=F_1J_1F_1^T$ for some real matrix $F_1$ with $\det F_1\ne 0$ and $J_1 = \left[\begin{matrix}0_{n\times n} & I_{n\times n}\\ -I_{n\times n} & 0_{n\times n} \end{matrix}\right]$. Therefore ($U:=U_k\cdots U_2 U_1$)
\[
\left[\begin{matrix}I_{2\times 2} & 0 \\ 0 & F_1^{-1}\end{matrix}\right]U \left[\begin{matrix}
0 & 1 &  *\\
-1 & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right]U^T \left[\begin{matrix}I_{2\times 2} & 0 \\ 0 & (F_1^{-1})^T\end{matrix}\right]
= \left[\begin{matrix}
0 & 1 & 0\\
-1 & 0 & 0 \\
0 & 0 & J_1 \\
\end{matrix}
\right].
\]
Define
\[
L=\left[
\begin{matrix}
0 & 1 & 0 & 0 \\
0 & 0 & 0 & I_{n\times n} \\
1 & 0 & 0 & 0 \\
0 & 0 & I_{n\times n} & 0
\end{matrix}
\right]
\]
and
\[
F = \left(L\left[\begin{matrix} I_{2\times 2} & 0  \\ 0 & F_1^{-1}\end{matrix}\right]U\left[
\begin{matrix}
1 & 0 &  0_{1\times 2n}\\
0 & \frac{1}{a} & 0_{1\times 2n} \\
0_{2n\times 1} & 0_{2n\times 1} & I_{2n\times 2n} \\
\end{matrix}
\right]\right)^{-1}.
\]
Then
\begin{eqnarray*}
F^{-1}A(F^{-1})^T &=& L\left[\begin{matrix} I_{2\times 2} & 0  \\ 0 & F_1^{-1}\end{matrix}\right]U\left[
\begin{matrix}
1 & 0 &  0_{1\times 2n}\\
0 & \frac{1}{a} & 0_{1\times 2n} \\
0_{2n\times 1} & 0_{2n\times 1} & I_{2n\times 2n} \\
\end{matrix}
\right]\left[
\begin{matrix}
0 & a &  *\\
-a & 0 & * \\
* & * & A_1 \\
\end{matrix}
\right]
\\
& & \cdot \left[
\begin{matrix}
1 & 0 &  0_{1\times 2n}\\
0 & \frac{1}{a} & 0_{1\times 2n} \\
0_{2n\times 1} & 0_{2n\times 1} & I_{2n\times 2n} \\
\end{matrix}
\right]U^T
\left[\begin{matrix} I_{2\times 2} & 0  \\ 0 & (F_1^{-1})^T\end{matrix}\right]L^T\\
&=&\left[
\begin{matrix}
0 & I_{(n+1)\times (n+1)}\\
-I_{(n+1)\times (n+1)} & 0
\end{matrix}
\right].
\end{eqnarray*}
By induction, we have proved the claim.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 310) Prove the converse.
\begin{proof} For any given $x$ and $y$, define $f(t)=(S(t)x,JS(t)y)$. Then we have
\begin{eqnarray*}
\frac{d}{dt}f(t)&=&(\frac{d}{dt}S(t)x,JS(t)y)+(S(t)x, J\frac{d}{dt}S(t)y)\\
&=& (G(t)S(t)x, JS(t)y)+(S(t)x, JG(t)S(t)y) \\
&=& (JL(t)S(t)x, JS(t)y)+(S(t)x, J^2L(t)S(t)y)\\
&=&(L(t)S(t)x, J^TJS(t)y)-(S(t)x,L(t)S(t)y) \\
&=&(S(t)x, L(t)S(t)y) - (S(t)x, L(t)S(t)y) \\
&=&0.
\end{eqnarray*}
So $f(t)=f(0)=(S(0)x,JS(0)y)=(x,Jy)$. Since $x$ and $y$ are arbitrary, we conclude $S(t)$ is a family of symplectic matrices.
\end{proof}

\noindent  $\blacktriangleright$  3. (page 311) Prove that plus or minus 1 cannot be an eigenvalue of odd multiplicity of a symplectic matrix.
\begin{proof}
Skipped for this version.
\end{proof}

\noindent  $\blacktriangleright$ 4. (page 312) Verify Theorem 6.
\begin{proof}
We note
\[
\frac{dv}{dt}=\left[\begin{matrix}\sum_{i=1}^{2n}\frac{\partial v_1}{\partial u_i}\frac{du_i}{dt} \\ \cdots \\ \sum_{i=1}^{2n}\frac{\partial v_{2n}}{\partial u_i}\frac{du_i}{dt} \end{matrix}\right] = \frac{\partial v}{\partial u} \frac{du}{dt}= \frac{\partial v}{\partial u}JH_u.
\]
$H(u)$ can be seen as a function of $v$: $K(v)\stackrel{\Delta}{=}H(u(v))$. So
\[
K_v = \left[\begin{matrix}\frac{\partial K}{\partial v_1} \\ \cdots \\ \frac{\partial K}{\partial v_{2n}}\end{matrix}\right]=
\left[
\begin{matrix}
\sum_{i=1}^{2n}\frac{\partial H}{\partial u_i}\frac{\partial u_i}{\partial v_1}\\
\cdots\\
\sum_{i=1}^{2n}\frac{\partial H}{\partial u_i}\frac{\partial u_i}{\partial v_{2n}}
\end{matrix}
\right]
=\left[\begin{matrix}
\frac{\partial u_1}{\partial v_1} & \cdots & \frac{\partial u_{2n}}{\partial v_1} \\
\cdots & \cdots & \cdots \\
\frac{\partial u_1}{\partial v_{2n}} & \cdots & \frac{\partial u_{2n}}{\partial v_{2n}}
\end{matrix}\right]
\left[\begin{matrix}\frac{\partial H}{\partial u_1}\\ \cdots \\ \frac{\partial H}{\partial u_{2n}}\end{matrix}\right]=\left(\frac{\partial u}{\partial v}\right)^TH_u.
\]
Since $\partial v/\partial u$ is symplectic, by Theorem 2, $\partial u/\partial v$ and $(\partial v/\partial u )^T$ are also symplectic. So using formula (4) gives us
\[
\frac{dv}{dt}=\frac{\partial v}{\partial u}J \left(\frac{\partial v}{\partial u}\right)^T\left(\frac{\partial u}{\partial v}\right)^TH_u = J \left(\frac{\partial u}{\partial v}\right)^TH_u = JK_v.
\]
\end{proof}

\subsection{Tensor Product}

\noindent  $\blacktriangleright$ 1. (page 313) Establish a natural isomorphism between tensor products defined with respect to two pairs of distinct bases.
\begin{solution} Skipped for this version.
%Suppose $(e_i)_{i=1}^n$ and $(\hat e_i)_{i=1}^n$ are two distinct bases of $U$, and $(f_j)_{j=1}^m$ and $(\hat f_j)_{j=1}^m$ two distinct bases of %$V$. Suppose $e_i=\sum a_{ik}e_k'$ and $f_j=\sum b_{jk}f_k'$. Then
%\[
%e_i\otimes f_j = sum a_{ik}b_{jl}e_k'\otimes f_l'.
%\]
%So a natural isomorphism between tensor products defined with respect to two pairs of distinct bases is
%\[
%e_i \otimes f_j \mapsto \sum a_{ik}b_{jl}e_k'\otimes f_l'.
%\]
\end{solution}

\noindent  $\blacktriangleright$  2. (page 314) Verify that (4) maps $U\otimes V$ onto ${\cal L}(U', V)$.
\begin{proof} Skipped for this version.
\end{proof}

\noindent  $\blacktriangleright$  3. (page 316) Show that if $\{u_i\}$ and $\{v_j\}$ are linearly independent, so are $u_i \otimes v_i$. Show that $M_{ij}$ is positive.
\begin{proof} Skipped for this version.
\end{proof}

\noindent  $\blacktriangleright$  4. (page 316) Let $u$ be a twice differentiable function of $x_1$, $\cdots$, $x_n$ defined in a neighborhood of a point $p$, where $u$ has a local minimum. Let $(A_{ij})$ be a symmetric, nonnegative matrix. Show that
\[
\sum A_{ij} \frac{\partial^2 u}{\partial x_i \partial x_j}(p) \ge 0.
\]
\begin{proof} Skipped for this version. \end{proof}

\subsection{Lattices}

\noindent  $\blacktriangleright$ 1. (page 318) Show that $a_1$ is a rational number.
\begin{proof} Skipped for this version. \end{proof}

\noindent  $\blacktriangleright$ 2. (page 318) (i) Prove Theorem 2. (ii) Show that unimodular matrices form a group under multiplication.
\begin{proof} Skipped for this version. \end{proof}

\noindent  $\blacktriangleright$ 3. (page 319) Prove Theorem 3.
\begin{proof} Skipped for this version. \end{proof}

\noindent  $\blacktriangleright$  4. (page 319) Show that $L$ is discrete if and only if there is a positive number $d$ such that the ball of radius $d$ centered at the origin contains no other point of $L$.
\begin{proof} Skipped for this version. \end{proof}

\subsection{Fast Matrix Multiplication}

There are no exercise problems for this section. For examples of implementation of Strassen's algorithm, we refer to Huss-Lederman et al. \cite{LJTTJ96} and references therein.

\subsection{Gershgorin's Theorem}

\noindent  $\blacktriangleright$ 1. (page 324) Show that if $C_i$ is disjoint from all the other Gershgorin discs, then $C_i$ contains exactly one eigenvalue of $A$.
 \begin{proof}
Using the notation of Gershgorin Circle Theorem, let $B(t) = D + tF$, $t\in [0,1]$. The eigenvalues of $B(t)$ are continuous functions of $t$ (Theorem 6 of Chapter 9). For $t=0$, the eigenvalues of $B(0)$ are the diagonal entries of $A$. As $t$ goes from 0 to 1, the radius of Gershgorin circles corresponding to $B(t)$ become bigger while the centers remain the same. So we can find for each $d_i$ a continuous path $\gamma_i(t)$ such that $\gamma_i(0)=d_i$ and $\gamma_i(t)$ is an eigenvalue of $B(t)$ $(0\le t \le 1$, $i=1,\cdots, n$). Moreover, by Gershgorin Circle Theorem, each path $\gamma_i(t)$ $(0\le t \le 1)$ is contained in disc $C_i=\{x: |x-d_i|\le |f_i|_l\}$. If for some $i_1$ and $i_2$ with $i_1\ne i_2$, $\gamma_{i_2}(0)$ falls into $C_{i_1}$, then it's necessary that $C_{i_1}\cap C_{i_2}\ne \emptyset$. This implies that for any Gershgorin disc that is disjoint from all the other Gershgorin discs, there is one and only one eigenvalue of $A$ falls within it.

\begin{remark}
There's a strengthened version of Gershgorin Circle Theorem that can be found at Wikipedia (http://en.wikipedia.org/wiki/Gershgorin\_circle\_theorem). The above exercise problem's solution is an adaption of the proof therein. The claim: {\it If the union of $k$ Gershgorin discs is disjoint from the union of the other $(n - k)$ Gershgorin discs, then the former union contains exactly $k$ and the latter $(n - k)$ eigenvalues of $A$}.
\end{remark}
\end{proof}

\subsection{The Multiplicity of Eigenvalues}

\noindent $\blacktriangleright$ (page 327) Show that if $n\equiv 2$ (mod 4), there are no $n\times n$ real matrices $A$, $B$, $C$ not necessarily self-adjoint, such that all their linear combinations (1) have real and distinct eigenvalues.
\begin{proof} Skipped for this version. \end{proof}

\subsection{The Fast Fourier Transform}

There are no exercise problems for this section.

\subsection{The Spectral Radius}

$\bigstar$ {\bf Comments}:
In the textbook (pp. 340), when the author applied Cauchy integral theorem to get formula (20): $\int_{|z|=s}R(z)z^jdz = 2\pi i A^j$, he used the version of Cauchy integral theorem for the outside region of a simple closed curve (see, for example, Gong and Gong \cite{GG07}, Chapter 2, Exercise 8).

\bigskip

\noindent  $\blacktriangleright$ 1. (page 337) Prove that the eigenvalues of an upper triangular matrix are its diagonal entries.
\begin{proof} If $T$ is an upper triangular matrix with diagonal entries $a_1$, $\cdots$, $a_n$, then its characteristic polynomial $p_T(\lambda)=\det (\lambda I - T) = \prod_{i=1}^n(\lambda-a_i)$. So
\[
\mbox{$\lambda_0$ is an eigenvalue of $T$} \Leftrightarrow \mbox{$\det (\lambda_0I - T) = 0$} \Leftrightarrow \prod_{i=1}^n(\lambda_0-a_i)=0 \Leftrightarrow \mbox{$\lambda_0$ is equal to one of $a_1$, $\cdots$, $a_n$.}
\]
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 338) Show that the Euclidean norm of a diagonal matrix is the maximum of the absolute value of its eigenvalues.
\begin{proof} Let $D=\mbox{diag}\{a_1, \cdots, a_n\}$. Then $De_i = \mbox{diag}\{0, \cdots, 0, a_i, 0, \cdots, 0\}$. So $|\!|De_i|\!|=|a_i|$. This shows $|\!|D|\!| \ge \max_{1\le i\le n}|a_i|$. For any $x\in \mathbb C^n$, $Dx=\mbox{diag}\{a_1x_1, \cdots, a_nx_n\}$. So $|\!|Dx|\!|=\sqrt{\sum_{i=1}^n|a_ix_i|^2}\le \max_{1\le i\le a}|a_i|\cdot |\!|x|\!|$. So $|\!|D|\!| \le \max_{1\le i \le n}|a_i|$. Combined, we conclude $|\!|D|\!| = \max_{1\le i \le n}|a_i|$. \end{proof}

\noindent  $\blacktriangleright$ 3. (page 339) Prove the analogue of relation (2),
\[
\lim_{j\to\infty} |A^j|^{1/j} = r(A),
\]
when $A$ is a linear mapping of any finite-dimensional {\it normed}, linear space $X$ (see Chapters 14 and 15).
\begin{proof}
By examining the proof for Euclidean space, we see inner product is not really used. All that has been exploited is just norm. So the proof for any finite-dimensional normed linear space is entirely identical to that of finite-dimensional Euclidean space.
\end{proof}

\noindent  $\blacktriangleright$ 4. (page 339) Show that the two definitions are equivalent.
\begin{proof} It suffices to note that a sequence $(A_n)_{n=1}^{\infty}$ converges to $A$ in matrix norm iff each $((A_n)_{ij})_{n=1}^{\infty}$ converges to $A_{ij}$ (Exercise 16 of Chapter 7). \end{proof}

\noindent  $\blacktriangleright$ 5. (page 339) Let $A(z)$ be an analytic matrix function in a domain $G$, invertible at every point of $G$. Show that then $A^{-1}(z)$, too, is an analytic matrix function in $G$.
\begin{proof} By formula (16) of Chapter 5: $D(a_1, \cdots, a_n)=\sum \sigma(p) a_{p_11}\cdots a_{p_nn}$, we conclude the determinant of any analytic matrix (i.e. matrix-valued analytic function) is analytic. By Cramer's rule and $\det A(z)\ne 0$ in $G$, we conclude $A^{-1}(z)$ is also analytic in $G$.  \end{proof}

\noindent  $\blacktriangleright$ 6. (page 339) Show that the Cauchy integral theorem holds for matrix-valued functions.
\begin{proof} By Exercise 4, Cauchy integral theorem for matrix-valued functions is reduced to Cauchy integral theorem for each entry of an analytic matrix.\end{proof}

\subsection{The Lorentz Group}
Skipped for this version.

\subsection{Compactness of the Unit Ball}

\noindent  $\blacktriangleright$ 1. (page 354) (i) Show that a set of functions whose first derivatives are uniformly bounded in $G$ are equicontinuous in $G$.

(ii) Use (i) and the Arzela-Ascoli theorem to prove Theorem 3.

(i)
\begin{proof}
We use the notation of Theorem 3. For simplicity, we assume $G$ is convex so that for any $x, y\in G$, the segment $\{z: z=(1-t)x+ty\}\subset G$. Then by Mean Value Theorem, there exists $c\in (0,1)$ such that
\[
|f(x)-f(y)|=|\nabla f((1-c)x+cy)\cdot (y-x)|\le dm |x-y|,
\]
where $d$ is the dimensional of the Euclidean space in which $G$ resides. This shows the elements of $D$ are equi-continuous in $G$.
\end{proof}

(ii)
\begin{proof}
From (i), we know each element of $D$ is uniformly continuous. So they can be extended to $\bar G$, the closure of $G$. Then Theorem 3 is the result of the following version of Arzela-Ascoli Theorem (see, for example, Yosida \cite{Yosida96}): {\it Let $S$ be a compact metric space, and $C(S)$ the Banach space of (real- or) complex-valued continuous functions $x(s)$ normed by $|\!|x|\!|=\sup_{s\in S}|x(s)|$. Then a sequence $\{x_n(s)\}\subset C(S)$ is relatively compact in $C(S)$ if the following two conditions are statisfied: (a) $\{x_n(s)\}$ is uniformly bounded; (b) $\{x_n(s)\}$ is equi-continuous.}
\end{proof}

\subsection{A Characterization of Commutators}

There are no exercise problems for this section.

\subsection{Liapunov's Theorem}

\noindent  $\blacktriangleright$  1. (page 360) Show that the sums (14) tend to a limit as the size of the subintervals $\Delta_j$ tends to zero. ({\it Hint}: Imitate the proof for the scalar case.)
\begin{proof} This is basically about how to extend the Riemann integral to Banach space valued functions. The theory is essential the same as the scalar case -- just replace the Euclidean norm with an arbitrary norm. So we omit the details.  \end{proof}

\noindent  $\blacktriangleright$ 2. (page 360) Show that the two definitions are equivalent.
\begin{proof}
It suffices to note $A_n\to A$ in matrix norm if and only if each entry of $A_n$ converges to the corresponding entry of $A$ (see Exercise 7 and formula (51) of Chapter 7).
\end{proof}

\noindent  $\blacktriangleright$ 3. (page 360) Show, using Lemma 4, that for the integral (12)
\[
\lim_{T\to\infty} \int_0^T e^{W^*t} e^{Wt} dt
\]
\begin{proof}
We note for $T'>T$, by Definition 2
\[
\left|\!\left|\int_T^{T'}e^{W^*t}e^{Wt}dt\right|\!\right| \le \int_T^{T'} \left|\!\left|e^{W^*t}\right|\!\right|\left|\!\left|e^{Wt}\right|\!\right|dt.
\]
By Lemma 4, $\lim_{T,T'\to\infty}\int_T^{T'}\left|\!\left|e^{W^*t}\right|\!\right|\left|\!\left|e^{Wt}\right|\!\right|dt = 0$. So by Cauchy's criterion, we conclude the integral (12) exists.
\end{proof}


\subsection{The Jordan Canonical Form}\label{appendix_Jordan_canonical_form}

There are no exercise problems for this section.

\subsection{Numerical Range}

\noindent  $\blacktriangleright$ 1. (page 367) Show that for $A$ normal, equality holds in (2).
\begin{proof}
By Theorem 8 of Chapter 8, we can find an orthonormal basis consisting of eigenvectors of $A$. Let $a_1$, $\cdots$, $a_n$ be the eigenvalues of $A$ (multiplicity counted) with $v_1$, $\cdots$, $v_n$ the corresponding eigenvectors. For any $x\in X$, we can find $\theta_1(x)$, $\cdots$, $\theta_n(x) \in \mathbb C$ such that $x=\sum_{i=1}^n\theta_i(x)v_i$. Then
\[
|(Ax,x)|=\left|\sum_{i,j}\theta_i(x)\bar\theta_j(x)(Av_i,v_j)\right|= \left|\sum_{i,j}\theta_i(x)\bar\theta_j(x)(a_iv_i,v_j)\right| = \left|\sum_{i=1}^na_i|\theta_i(x)|^2\right| \le \max_{1\le i\le n}|a_i|=r(A).
\]
Combined with (2), we conclude $r(A)=w(A)$.
\end{proof}

\noindent  $\blacktriangleright$ 2. (page 367) Show that for $A$ normal,
\[
w(A) = |\!| A |\!|.
\]
\begin{proof}
By definition $|\!|A|\!| = \sup_{|\!|x|\!|=1}|\!|Ax|\!|$. Using the notation in the solution to Exercise 1, we have
\[
A x = \sum_i\theta_i(x)Av_i = \sum_{i}\theta_i(x)a_iv_i.
\]
So $|\!|Ax|\!|=\sqrt{\sum_{i=1}^n|a_i|^2|\theta_i(x)|^2}\le r(A)=w(A)$, where the last equality comes from Exercise 1. This implies $|\!|A|\!|\le w(A)$. By Theorem 13 (ii) of Chapter 7, $w(A) \le |\!|A|\!|$. Combined, we conclude $w(A)=|\!|A|\!|$.
\end{proof}

\noindent  $\blacktriangleright$ 3. (page 369) Verify (7) and (8).
\begin{proof}
To verify (7), we note
\[
\prod_k(1-r_kz)=\prod_k(\bar r_k-z)\cdot \prod_kr_k = \prod_k(\bar r_k-z) \cdot e^{\frac{2\pi i}{n}\cdot \sum_{k=1}^nk} = \prod_k(\bar r_k-z) \cdot e^{(n+1)\pi i}=(-1)^{n+1}\prod_k(\bar r_k-z).
\]
Since $(\bar r_k)^n=1$, $\bar r_1$, $\cdots$, $\bar r_n$ are  a permutation of $r_1$, $\cdots$, $r_n$. So
\[
\prod_k(\bar r_k-z) = (-1)^n(z^n-1).
\]
Combined, we conclude $(1-z^n)=\prod_{k}(1-r_kz)$. To verify (8), we use (7) to get
\[
\frac{1}{n}\sum_j \prod_{k\ne j} (1-r_kz) = \frac{1}{n}\sum_j\frac{1-z^n}{1-r_jz}.
\]
$\sum_j\frac{1}{1-r_jz}$ is a rational function over the complex plane $\mathbb C$, which can be assumed to have the form $\frac{P(z)}{Q(z)}$ with $P(z)$ and $Q(z)$ being polynomials without common factors. Since $r_1$, $\cdots$, $r_n$ are singularity of degree 1 for $\sum_j\frac{1}{1-r_jz}$, we conclude $Q(z)=\prod_k (1-r_kz)=1-z^n$, up to the difference of a constant factor. Since $\sum_j\frac{1}{1-r_jz}$ has no zeros on complex plane, we conclude $P(z)$ must be a constant. Combined, we conclude
\[
\sum_j\frac{1}{1-r_jz} = \frac{C}{1-z^n}
\]
for some constant $C$. By letting $z\to 0$, we can see $C=n$. This finishes the verification of (8).
\end{proof}

\noindent  $\blacktriangleright$ 4. (page 370) Determine the numerical range of $A= \left(
                                                                                      \begin{array}{cc}
                                                                                        1 & 1 \\
                                                                                        0 & 1 \\
                                                                                      \end{array}
                                                                                    \right)$ and of $A^2 = \left(
                                                                                                             \begin{array}{cc}
                                                                                                               1 & 2 \\
                                                                                                               0 & 1 \\
                                                                                                             \end{array}
                                                                                                           \right)$.
\begin{solution}
If $x=\left[\begin{matrix}x_1\\x_2\end{matrix}\right]$, $(Ax,x)=x_1^2+x_2^2+x_1x_2$. If $x_1^2+x_2^2=1$, we have $(Ax,x)= 1 + x_1\cdot \mbox{sign}(x_2)\sqrt{1-x_1^2}$. Calculus shows $f(\xi)=\xi\sqrt{1-\xi^2}$ $(-1\le \xi \le 1)$ achieves maximum at $\xi_0=\frac{\sqrt{2}}{2}$. So $w(A)=1+\frac{1}{2}=\frac{3}{2}$. Similarly, plain calculation shows $w(A^2)=2$.
\end{solution}


\begin{thebibliography}{99}

\bibitem{EGRS59} M. Engeli, T. Ginsburg, H. Rutishauser and E. Stiefel. {\it Refined iterative methods for computation of the solution and the eigenvalues of self-adjoint boundary value problems}, Birkhauser Verlag, Basel/Stuttgart, 1959.

\bibitem{GV96} Gene H. Golub and Charles F. van Loan. {\it Matrix computation}, 3rd Edition. Johns Hopkins University Press, 1996.


\bibitem{GG07} Gong Sheng and Gong Youhong. {\it Concise complex analysis}, Revised Edition, World Scientific, 2007.

\bibitem{Greene11} William H. Greene. {\it Econometric Analysis}, 7th ed., Prentice Hall, 2011.

\bibitem{Keener00} James P. Keener. {\it Principles of applied mathematics: Transformation and approximation}, revised edition. Westview Press, 2000.

\bibitem{Lan02a}蓝以中：《高等代数简明教程（上册）》。北京大学出版社，北京，2002.8。[Lan Yi-Zhong. {\it A concise course of advanced algebra} (in Chinese), Volume 1, Peking University Press, Beijing, 2002.8.]

\bibitem{Lax02} P. Lax. {\it Functional analysis},
Wiley-Interscience, 2002.

\bibitem{Lax07} P. Lax. {\it Linear algebra and its applications}, 2nd Edition, Wiley-Interscience, 2007.

\bibitem{LJTTJ96} Steven Huss-Lederman, Elaine M. Jacobson, Anna Tsao, Thomas Turnbull, Jeremy R. Johnson.
Implementation of Strassen's algorithm for matrix multiplication. Proceedings of the 1996 ACM/IEEE conference on Supercomputing (CDROM), p.32-es, January 01-01, 1996, Pittsburgh, Pennsylvania, United States.

\bibitem{Munkres97} J. Munkres. {\it Analysis on manifolds},
Westview Press, 1997.

\bibitem{Papadrakakis82} M. Papadrakakis. A family of methods with three-term recursion formulae. {\it International Journal for Numerical Methods in Engineering}, Vol. 18, 1785-1799 (1982).

\bibitem{Qiu96a}丘维声：《高等代数（上册）》，高等教育出版社，北京，1996。[Qiu Wei-Sheng. {\it Advanced algebra} (in Chinese), Volume 1, Higher Education Press, Beijing, 1996.]

\bibitem{Spivak65} Michael Spivak. {\it Calculus on manifolds: A modern approach to classical theorems of advanced calculus}. Perseus Books Publishing, 1965.

\bibitem{Yosida96} Kosaku Yosida. {\it Functional analysis}, 6th Edition. Springer, 1996.	

\end{thebibliography}
\end{document}

